From 3210563e42d54681c4bf83812b23f9603306589c Mon Sep 17 00:00:00 2001 From: Charles Teh Date: Tue, 21 Nov 2023 19:18:11 +0800 Subject: [PATCH] Add (news bites) --- app/Console/Kernel.php | 25 +- app/Helpers/FirstParty/OpenAI/OpenAI.php | 10 + .../FirstParty/OpmlParser/OpmlParser.php | 315 ++++++++++++++++++ app/Helpers/Global/string_helper.php | 17 + .../Controllers/Front/FrontHomeController.php | 13 +- .../Controllers/Front/FrontListController.php | 89 ++--- .../Controllers/Front/FrontPostController.php | 2 +- app/Http/Controllers/Tests/TestController.php | 21 +- ...Job.php => BrowseDFSAndWriteWithAIJob.php} | 2 +- app/Jobs/BrowseRSSPostJob.php | 38 +++ app/Jobs/BrowseSingleRSSJob.php | 61 ++++ app/Jobs/CrawlRssPostJob.php | 35 ++ app/Jobs/ParseRssPostMetadataJob.php | 35 ++ app/Jobs/Tasks/BrowseRSSLatestNewsTask.php | 60 ++++ app/Jobs/Tasks/CrawlRssPostTask.php | 167 ++++++++++ app/Jobs/Tasks/FillPostMetadataTask.php | 4 +- app/Jobs/Tasks/ParseDFSNewsTask.php | 5 +- app/Jobs/Tasks/ParseRssPostMetadataTask.php | 117 +++++++ app/Jobs/Tasks/PublishIndexPostTask.php | 11 +- app/Models/RssPost.php | 69 ++++ app/Notifications/PostWasPublished.php | 7 +- composer.json | 1 + composer.lock | 139 +++++++- config/feed-reader.php | 71 ++++ config/platform/global.php | 96 +++++- ...23_09_22_154137_create_serp_urls_table.php | 1 + .../2023_11_19_121001_create_posts_table.php | 1 + ...20253_add_image_ref_url_to_posts_table.php | 28 -- ...23_11_21_073326_create_rss_posts_table.php | 56 ++++ resources/sass/app-front.scss | 2 +- .../views/front/partials/about.blade.php | 5 +- .../views/front/partials/news_bites.blade.php | 86 +++++ resources/views/front/post_list.blade.php | 4 +- resources/views/front/rss_post_list.blade.php | 55 +++ resources/views/front/welcome.blade.php | 68 ++-- routes/tests.php | 16 +- routes/web.php | 2 + 37 files changed, 1581 insertions(+), 153 deletions(-) create mode 100644 app/Helpers/FirstParty/OpmlParser/OpmlParser.php rename app/Jobs/{BrowseAndWriteWithAIJob.php => BrowseDFSAndWriteWithAIJob.php} (94%) create mode 100644 app/Jobs/BrowseRSSPostJob.php create mode 100644 app/Jobs/BrowseSingleRSSJob.php create mode 100644 app/Jobs/CrawlRssPostJob.php create mode 100644 app/Jobs/ParseRssPostMetadataJob.php create mode 100644 app/Jobs/Tasks/BrowseRSSLatestNewsTask.php create mode 100644 app/Jobs/Tasks/CrawlRssPostTask.php create mode 100644 app/Jobs/Tasks/ParseRssPostMetadataTask.php create mode 100644 app/Models/RssPost.php create mode 100644 config/feed-reader.php delete mode 100644 database/migrations/2023_11_20_020253_add_image_ref_url_to_posts_table.php create mode 100644 database/migrations/2023_11_21_073326_create_rss_posts_table.php create mode 100644 resources/views/front/partials/news_bites.blade.php create mode 100644 resources/views/front/rss_post_list.blade.php diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 96ec036..aa4bc97 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -2,7 +2,8 @@ namespace App\Console; -use App\Jobs\BrowseAndWriteWithAIJob; +use App\Jobs\BrowseDFSAndWriteWithAIJob; +use App\Jobs\BrowseRSSPostJob; use App\Jobs\PublishIndexPostJob; use App\Models\Post; use Illuminate\Console\Scheduling\Schedule; @@ -15,20 +16,24 @@ class Kernel extends ConsoleKernel */ protected function schedule(Schedule $schedule): void { - $schedule->command('sitemap:generate')->daily()->name('sitemap-generate-daily'); + $schedule->command('sitemap:generate')->everySixHours()->name('sitemap-generate-every-six-hours'); $schedule->call(function () { - BrowseAndWriteWithAIJob::dispatch()->onQueue('default')->onConnection('default'); - })->everySixHours()->name('write-a-job-6hrs'); + BrowseRSSPostJob::dispatch(1)->onQueue('default')->onConnection('default'); + })->hourly()->name('browse-rss-post-job-hourly'); - $schedule->call(function () { - $future_post = Post::whereNotNull('published_at')->where('status', 'future')->where('published_at', '<=', now())->orderBy('published_at', 'ASC')->first(); + // $schedule->call(function () { + // BrowseDFSAndWriteWithAIJob::dispatch()->onQueue('default')->onConnection('default'); + // })->everySixHours()->name('write-a-job-6hrs'); - if (! is_null($future_post)) { - PublishIndexPostJob::dispatch($future_post->id)->onQueue('default')->onConnection('default'); - } + // $schedule->call(function () { + // $future_post = Post::whereNotNull('published_at')->where('status', 'future')->where('published_at', '<=', now())->orderBy('published_at', 'ASC')->first(); - })->everyMinute()->name('schedule-future-post'); + // if (! is_null($future_post)) { + // PublishIndexPostJob::dispatch($future_post->id)->onQueue('default')->onConnection('default'); + // } + + // })->everyMinute()->name('schedule-future-post'); } diff --git a/app/Helpers/FirstParty/OpenAI/OpenAI.php b/app/Helpers/FirstParty/OpenAI/OpenAI.php index ae2011f..0566c76 100644 --- a/app/Helpers/FirstParty/OpenAI/OpenAI.php +++ b/app/Helpers/FirstParty/OpenAI/OpenAI.php @@ -8,6 +8,16 @@ class OpenAI { + public static function getRssPostMeta($user_prompt, $model_max_tokens = 1536, $timeout = 60) + { + + $openai_config = 'openai-gpt-4-turbo'; + + $system_prompt = "Based on given article, populate the following in valid JSON format\n{\n\"title\":\"(Title based on article)\",\n\"keywords\":[\"(Important keywords in 1-2 words per keyword)\"],\n\"category\":\"(Updates|Opinions|Features|New Launches|How Tos|Reviews)\",\n\"summary\":\"(Summarise article in 60-90 words to help readers understand what article is about)\",\n\"entities\":[(List of companies, brands that are considered as main entites in 1-2 words. per entity)],\n\"society_impact\":\"(Explain in 30-50 words how this article content's can impact society on technological aspect)\",\n\"society_impact_level:\"(low|medium|high)\"\n}"; + + return self::getChatCompletion($user_prompt, $system_prompt, $openai_config, $model_max_tokens, $timeout); + } + public static function getArticleMeta($user_prompt, $model_max_tokens = 1536, $timeout = 60) { diff --git a/app/Helpers/FirstParty/OpmlParser/OpmlParser.php b/app/Helpers/FirstParty/OpmlParser/OpmlParser.php new file mode 100644 index 0000000..81be0a2 --- /dev/null +++ b/app/Helpers/FirstParty/OpmlParser/OpmlParser.php @@ -0,0 +1,315 @@ + 'id', // Unique element ID + 'TYPE' => 'type', // Element type (audio, feed, playlist, etc) + 'URL' => 'url', // Location of the item. Depending on the value of the type attribute, this can be either a single audio stream or audio playlist, a remote OPML file containing a playlist of audio items, or a remote OPML file to browse. + 'HTMLURL' => 'html_url', // Top-level link element + 'TEXT' => 'title', // Specifies the title of the item. + 'TITLE' => 'title', // Specifies the title of the item. + 'LANGUAGE' => 'language', // The value of the top-level language element + 'TARGET' => 'link_target', // The target window of the link + 'VERSION' => 'version', // Varies depending on the version of RSS that's being supplied. RSS1 for RSS 1.0; RSS for 0.91, 0.92 or 2.0; scriptingNews for scriptingNews format. There are no known values for Atom feeds, but they certainly could be provided. + 'DESCRIPTION' => 'description', // The top-level description element from the feed. + 'XMLURL' => 'xml_url', // The http address of the feed + 'CREATED' => 'created', // Date-time that the outline node was created + 'IMAGEHREF' => 'imageHref', // A link to an image related to the element (.e.g. a song poster) + 'ICON' => 'icon', // A link to an icon related to the element (.e.g. a radio-station's icon) + 'F' => 'song', // When used in OPML playlists, it's used to specify the song's filename. + 'BITRATE' => 'bitrate', // Used to specify the bitrate of an audio stream, in kbps. + 'MIME' => 'mime', // Enter the MIME type of the stream/file. + 'DURATION' => 'duration', // If the item is not a live radio stream, set duration to the playback duration in seconds to ensure the progress bar is displayed correctly. This is especially helpful for VBR files where our bitrate detection may not work properly. + 'LISTENERS' => 'listeners', // Used to display the number of listeners currently listening to an audio stream. + 'CURRENT_TRACK' => 'current_track', // Used to display the track that was most recently playing on a radio station. + 'GENRE' => 'genre', //The genre of a stream may be specified with this attribute. + 'SOURCE' => 'source', // The source of the audio. This is currently used to describe, for instance, how a concert was recorded. + ]; + + /** + * Constructor. + * + * @return OPML_Parser + */ + public function OPML_Parser() + { + $this->parser = null; + $this->opml_contents = []; + $this->position = 0; + } + + /** + * OPML_Parser::rewind() + * This rewinds the iterator to the beginning. + */ + public function rewind(): void + { + $this->position = 0; + } + + /** + * OPML_Parser::current() + * Return the current element + * + * @return mixed The current element + */ + public function current() + { + return $this->opml_contents[$this->position]; + } + + /** + * OPML_Parser::key() + * Return the key of the current element + * + * @return scalar The key of the current element + */ + public function key() + { + return $this->position; + } + + /** + * OPML_Parser::next() + * Move he iterator to the next entry. + */ + public function next(): void + { + $this->position++; + } + + /** + * OPML_Parser::valid() + * Checks if current position is valid + * + * @return bool Returns TRUE if the current position is valid (if the element exists) + */ + public function valid(): bool + { + return isset($this->opml_contents[$this->position]); + } + + /** + * OPML_Parser::getOPMLFile() + * Fetch Contents of Page (from file or URL). Queries are performed using cURL and, if not available, using file_get_contents() + * + * @param string $location The location (file or URL) of the OPML file + * @param resource $context stream context from `stream_context_create()`. Contexts can be passed to most filesystem related stream creation functions (i.e. fopen(), file(), file_get_contents(), etc...). + * @return string contents of the page at $location + */ + protected function getOPMLFile($location = '', $context = null) + { + if (in_array('curl', get_loaded_extensions())) { + $options = [ + CURLOPT_RETURNTRANSFER => true, // return web page + CURLOPT_HEADER => false, // don't return headers + CURLOPT_FOLLOWLOCATION => true, // follow redirects + CURLOPT_MAXREDIRS => 10, // stop after 10 redirects + CURLOPT_ENCODING => '', // handle compressed + CURLOPT_USERAGENT => 'test', // name of client + CURLOPT_AUTOREFERER => true, // set referrer on redirect + CURLOPT_CONNECTTIMEOUT => 120, // time-out on connect + CURLOPT_TIMEOUT => 120, // time-out on response + ]; + + $ch = curl_init($location); + curl_setopt_array($ch, $options); + $contents = curl_exec($ch); + } else { + $contents = file_get_contents($location, false, $context); + } + + return $contents; + } + + /** + * OPML_Parser::ParseElementStart() + * The XML tag-open handler. It is used here to parse and store attributes from outline tags + * + * @param resource $parser A reference to the XML parser calling the handler. + * @param string $tagName The name of the element (tag) for which this handler is called. If case-folding is in effect for this parser, the element name will be in uppercase letters. + * @param array $attrs The element's attributes (if any).The keys of this array are the attribute names, the values are the attribute values.Attribute names are case-folded on the same criteria as element names. + */ + protected function ParseElementStart($parser, $tagName, $attrs) + { + $map = $this->opml_map_vars; + + // Parse attributes if entered an "outline" tag + if ($tagName == 'OUTLINE') { + $node = []; + + foreach (array_keys($this->opml_map_vars) as $key) { + if (isset($attrs[$key])) { + $node[$key] = $attrs[$key]; + } + } + + $this->opml_contents[] = $node; + } + } + + /** + * OPML_Parser::ParseElementEnd() + * The XML tag-close handler. It is used for processing closed tags (not used in this class but can be overloaded in child classes) + * + * @param resource $parser A reference to the XML parser calling the handler. + * @param string $tagName The name of the element (tag) for which this handler is called. If case-folding is in effect for this parser, the element name will be in uppercase letters. + */ + protected function ParseElementEnd($parser, $tagName) + { + // nothing to do. + } + + /** + * OPML_Parser::ParseElementCharData() + * The XML char data handler. It is used for processing char data (not used in this class but can be overloaded in child classes) + * + * @param resource $parser A reference to the XML parser calling the handler. + * @param string $data contains the character data as a string. Character data handler is called for every piece of a text in the XML document. It can be called multiple times inside each fragment (e.g. for non-ASCII strings). + */ + protected function ParseElementCharData($parser, $data) + { + // nothing to do. + } + + /** + * OPML_Parser::Parser() + * Parse the OPML data (resulting data stored in $opml_contents) + * + * @param string $XMLdata A reference to the XML parser calling the handler. + */ + protected function Parser($XMLdata) + { + // Reset iterator + $this->position = 0; + + $this->parser = xml_parser_create(); + + xml_set_object($this->parser, $this); + + xml_set_element_handler($this->parser, [&$this, 'ParseElementStart'], [&$this, 'ParseElementEnd']); + + xml_set_character_data_handler($this->parser, [&$this, 'ParseElementCharData']); + + xml_parse($this->parser, $XMLdata); + + xml_parser_free($this->parser); + } + + /** + * OPML_Parser::ParseLocation() + * Parse contents from OPML file or URL + * + * @param string $location The location (file or URL) of the OPML file + * @param resource $context stream context from `stream_context_create()`. Contexts can be passed to most filesystem related stream creation functions (i.e. fopen(), file(), file_get_contents(), etc...). + */ + public function ParseLocation($location, $context = null) + { + $this->unparsed_opml = trim($this->getOPMLFile($location, $context)); + $this->Parser($this->unparsed_opml); + } + + /** + * OPML_Parser::ParseOPML() + * Parse contents from OPML string + * + * @param string $opml The unparsed OPML string + */ + public function ParseOPML($opml) + { + $this->unparsed_opml = trim($opml); + $this->Parser($this->unparsed_opml); + } + + /** + * OPML_Parser::getUnparsedOPML() + * Get the unparsed OPML string + * + * @return string The unparsed OPML string + */ + public function getUnparsedOPML() + { + return $this->unparsed_opml; + } + + /** + * OPML_Parser::setAttribute() + * Add (or replace) an OPML attribute to parser's attribute list + * + * @param string $attribute The new attribute to parse (whitespace replaced by underscores) + * @param string $mapped_attribute The attribute's name to be returned. Defaults to the same attribute's name (in lowercase form) + */ + public function setAttribute($attribute, $mapped_attribute = '') + { + $attribute = strtoupper(preg_replace('/\s+/', '_', trim($attribute))); + if ($mapped_attribute != '') { + $mapped_attribute = strtoupper(preg_replace('/\s+/', '_', trim($mapped_attribute))); + } else { + $mapped_attribute = strtolower($attribute); + } + + $this->opml_map_vars[$attribute] = $mapped_attribute; + } + + /** + * OPML_Parser::unsetAttribute() + * Remove an OPML attribute to parser's attribute list + * + * @param string $attribute The attribute to remove (whitespace replaced by underscores) + */ + public function unsetAttribute($attribute) + { + $attribute = strtoupper(preg_replace('/\s+/', '_', trim($attribute))); + + unset($this->opml_map_vars[$attribute]); + } +} diff --git a/app/Helpers/Global/string_helper.php b/app/Helpers/Global/string_helper.php index 6ed3c5a..2557feb 100644 --- a/app/Helpers/Global/string_helper.php +++ b/app/Helpers/Global/string_helper.php @@ -83,6 +83,13 @@ function markdown_min_read($markdown) } } +if (! function_exists('min_read')) { + function min_read($string) + { + return read_duration(plain_text($string)); + } +} + if (! function_exists('unslug')) { function unslug($slug, $delimiter = '-') { @@ -150,6 +157,16 @@ function is_empty($value): bool } } +if (! function_exists('get_domain_from_url')) { + function get_domain_from_url($url) + { + $parse = parse_url($url); + + return $parse['host']; + } +} + + if (! function_exists('get_country_name_by_iso')) { function get_country_name_by_iso($country_iso) { diff --git a/app/Http/Controllers/Front/FrontHomeController.php b/app/Http/Controllers/Front/FrontHomeController.php index 9961ddc..d42d3b7 100644 --- a/app/Http/Controllers/Front/FrontHomeController.php +++ b/app/Http/Controllers/Front/FrontHomeController.php @@ -4,6 +4,7 @@ use App\Http\Controllers\Controller; use App\Models\Post; +use App\Models\RssPost; use Artesaos\SEOTools\Facades\SEOMeta; use Artesaos\SEOTools\Facades\SEOTools; use GrahamCampbell\Markdown\Facades\Markdown; @@ -18,13 +19,15 @@ public function home(Request $request) // $query->whereNotIn('id', [$featured_post?->id]); // })->where('status', 'publish')->orderBy('published_at', 'desc')->limit(5)->get(); - $featured_posts = Post::where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(3)->get(); + // $featured_posts = Post::where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(3)->get(); - $latest_posts = Post::where(function ($query) use ($featured_posts) { - $query->whereNotIn('id', $featured_posts->pluck('id')->toArray()); - })->where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(10)->get(); + // $latest_posts = Post::where(function ($query) use ($featured_posts) { + // $query->whereNotIn('id', $featured_posts->pluck('id')->toArray()); + // })->where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(10)->get(); - return response(view('front.welcome', compact('featured_posts', 'latest_posts')), 200); + $rss_posts = RssPost::where('status', 'published')->orderBy('published_at', 'desc')->paginate(30); + + return response(view('front.welcome', compact('rss_posts')), 200); } public function terms(Request $request) diff --git a/app/Http/Controllers/Front/FrontListController.php b/app/Http/Controllers/Front/FrontListController.php index 3a5ec02..8dec6a6 100644 --- a/app/Http/Controllers/Front/FrontListController.php +++ b/app/Http/Controllers/Front/FrontListController.php @@ -4,8 +4,7 @@ use App\Http\Controllers\Controller; use App\Models\Category; -use App\Models\Post; -use App\Models\PostCategory; +use App\Models\RssPost; use Artesaos\SEOTools\Facades\SEOTools; use Illuminate\Http\Request; use JsonLd\Context; @@ -13,15 +12,24 @@ class FrontListController extends Controller { public function search(Request $request) + { + if (is_empty($request->input('query'))) { + return redirect()->back(); + } + + return redirect()->to(route('front.search.results',['query' => $request->input('query')])); + } + + public function searchResults(Request $request, $query) { $page_type = 'search'; - $query = $request->get('query', ''); + $query = strtolower($query); $breadcrumbs = collect([ ['name' => 'Home', 'url' => route('front.home')], - ['name' => 'Search', 'url' => null], - ['name' => $query, 'url' => url()->current()], + ['name' => 'News Bites', 'url' => route('front.search.results',['query' => $query])], + ['name' => $query, 'url' => null], ]); $title = 'Latest News about '.ucwords($query).' in FutureWalker'; @@ -32,14 +40,13 @@ public function search(Request $request) SEOTools::jsonLd(); SEOTools::setTitle($title, false); - // Use full-text search capabilities of your database - // For example, using MySQL's full-text search with MATCH...AGAINST - $posts = Post::with('category') - ->where('status', 'publish') - ->whereRaw("to_tsvector('english', title || ' ' || bites) @@ to_tsquery('english', ?)", [str_replace(' ', ' & ', $query)]) - ->where('published_at', '<=', now()) - ->orderBy('published_at', 'desc') - ->cursorPaginate(10); +$rss_posts = RssPost::with('category') + ->where('status', 'published') + ->whereRaw("to_tsvector('english', title || ' ' || bites || ' ' || body) @@ plainto_tsquery('english', ?)", [trim(preg_replace('/\s+/', ' ', $query))]) + ->where('published_at', '<=', now()) + ->orderBy('published_at', 'desc') + ->cursorPaginate(60); + // breadcrumb json ld $listItems = []; @@ -55,7 +62,7 @@ public function search(Request $request) 'itemListElement' => $listItems, ]); - return view('front.post_list', compact('posts', 'breadcrumbs', 'breadcrumb_context', 'title', 'page_type')); + return view('front.rss_post_list', compact('rss_posts', 'breadcrumbs', 'breadcrumb_context', 'title', 'page_type')); } public function index(Request $request) @@ -64,10 +71,10 @@ public function index(Request $request) $breadcrumbs = collect([ ['name' => 'Home', 'url' => route('front.home')], - ['name' => 'Latest News', 'url' => null], // or you can set a route for Latest News if there's a specific one + ['name' => 'News Bites', 'url' => null], // or you can set a route for Latest News if there's a specific one ]); - $title = 'Latest News from FutureWalker'; + $title = 'Latest News Bites from FutureWalker'; SEOTools::metatags(); SEOTools::twitter(); @@ -75,10 +82,10 @@ public function index(Request $request) SEOTools::jsonLd(); SEOTools::setTitle($title, false); - $posts = Post::with('category')->where('status', 'publish') + $rss_posts = RssPost::with('category')->where('status', 'published') ->where('published_at', '<=', now()) ->orderBy('published_at', 'desc') - ->cursorPaginate(10) ?? collect(); + ->cursorPaginate(60) ?? collect(); // breadcrumb json ld $listItems = []; @@ -90,52 +97,46 @@ public function index(Request $request) ]; } - //dd($posts); + //dd($rss_posts); $breadcrumb_context = Context::create('breadcrumb_list', [ 'itemListElement' => $listItems, ]); - return view('front.post_list', compact('posts', 'breadcrumbs', 'breadcrumb_context', 'page_type')); + return view('front.rss_post_list', compact('rss_posts', 'breadcrumbs', 'breadcrumb_context', 'page_type')); } public function category(Request $request, $category_slug) { - $page_type = 'default'; + $page_type = 'category'; - // Fetch the category by slug $category = Category::where('slug', $category_slug)->first(); - // Check if the category exists - if (! $category) { - abort(404, 'Category not found'); + if (is_null($category)) + { + abort(404); } - // Breadcrumb logic - $breadcrumbs = collect([['name' => 'Home', 'url' => route('front.home')]]); - foreach ($category->ancestors as $ancestor) { - $breadcrumbs->push(['name' => $ancestor->name, 'url' => route('front.category', $ancestor->slug)]); - } - $breadcrumbs->push(['name' => $category->name, 'url' => route('front.category', $category->slug)]); - // Get the IDs of the category and its descendants - $categoryIds = $category->descendants->pluck('id')->push($category->id); + $breadcrumbs = collect([ + ['name' => 'Home', 'url' => route('front.home')], + ['name' => $category->name, 'url' => null], + ]); - // Get the posts associated with these category IDs - $postIds = PostCategory::whereIn('category_id', $categoryIds)->pluck('post_id'); - $posts = Post::whereIn('id', $postIds) - ->where('published_at', '<=', now()) - ->where('status', 'publish') - ->orderBy('published_at', 'desc') - ->cursorPaginate(10); + $title = 'Latest News from FutureWalker'; - $title = $category->name.' News from FutureWalker'; SEOTools::metatags(); SEOTools::twitter(); SEOTools::opengraph(); - SEOTools::setTitle($title, false); SEOTools::jsonLd(); + SEOTools::setTitle($title, false); + + $rss_posts = RssPost::with('category')->where('status', 'published') + ->where('category_id', $category->id) + ->where('published_at', '<=', now()) + ->orderBy('published_at', 'desc') + ->cursorPaginate(60) ?? collect(); // breadcrumb json ld $listItems = []; @@ -147,10 +148,12 @@ public function category(Request $request, $category_slug) ]; } + //dd($rss_posts); + $breadcrumb_context = Context::create('breadcrumb_list', [ 'itemListElement' => $listItems, ]); - return view('front.post_list', compact('category', 'posts', 'breadcrumbs', 'breadcrumb_context', 'page_type')); + return view('front.rss_post_list', compact('rss_posts', 'breadcrumbs', 'breadcrumb_context', 'page_type','category')); } } diff --git a/app/Http/Controllers/Front/FrontPostController.php b/app/Http/Controllers/Front/FrontPostController.php index 6a30d35..e861339 100644 --- a/app/Http/Controllers/Front/FrontPostController.php +++ b/app/Http/Controllers/Front/FrontPostController.php @@ -28,7 +28,7 @@ public function redirect(Request $request, $slug) public function index(Request $request, $category_slug, $slug) { - $post = Post::where('slug', $slug)->whereIn('status', ['publish','future'])->first(); + $post = Post::where('slug', $slug)->whereIn('status', ['publish', 'future'])->first(); if (is_null($post)) { return abort(404); diff --git a/app/Http/Controllers/Tests/TestController.php b/app/Http/Controllers/Tests/TestController.php index 88ae48f..eab0050 100644 --- a/app/Http/Controllers/Tests/TestController.php +++ b/app/Http/Controllers/Tests/TestController.php @@ -4,22 +4,31 @@ use App\Helpers\FirstParty\ImageGen\ImageGen; use App\Http\Controllers\Controller; +use App\Jobs\Tasks\BrowseRSSLatestNewsTask; +use App\Models\Post; +use App\Notifications\PostWasPublished; use Illuminate\Http\Request; +use Illuminate\Support\Facades\Notification; use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow; use LaravelGoogleIndexing; -use App\Models\Post; - -use Illuminate\Support\Facades\Notification; -use App\Notifications\PostWasPublished; class TestController extends Controller { + public function opml(Request $request) + { + $raw_posts = BrowseRSSLatestNewsTask::handleSingle('https://hnrss.org/newest?q=ai', 240); + + foreach ($raw_posts as $raw_post) { + dump($raw_post); + } + + } public function notification(Request $request) { - $post = Post::find(1); + $post = Post::find(1); - Notification::route('facebook','default')->notify(new PostWasPublished($post)); + Notification::route('facebook', 'default')->notify(new PostWasPublished($post)); } public function imageGen(Request $request) diff --git a/app/Jobs/BrowseAndWriteWithAIJob.php b/app/Jobs/BrowseDFSAndWriteWithAIJob.php similarity index 94% rename from app/Jobs/BrowseAndWriteWithAIJob.php rename to app/Jobs/BrowseDFSAndWriteWithAIJob.php index 6eff6db..23f042e 100644 --- a/app/Jobs/BrowseAndWriteWithAIJob.php +++ b/app/Jobs/BrowseDFSAndWriteWithAIJob.php @@ -11,7 +11,7 @@ use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; -class BrowseAndWriteWithAIJob implements ShouldQueue +class BrowseDFSAndWriteWithAIJob implements ShouldQueue { use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; diff --git a/app/Jobs/BrowseRSSPostJob.php b/app/Jobs/BrowseRSSPostJob.php new file mode 100644 index 0000000..a1136dd --- /dev/null +++ b/app/Jobs/BrowseRSSPostJob.php @@ -0,0 +1,38 @@ +hours = $hours; + } + + /** + * Execute the job. + */ + public function handle(): void + { + $rss_urls = config('platform.global.rss'); + + foreach ($rss_urls as $rss_url) { + BrowseSingleRSSJob::dispatch($rss_url, $this->hours); + } + } +} diff --git a/app/Jobs/BrowseSingleRSSJob.php b/app/Jobs/BrowseSingleRSSJob.php new file mode 100644 index 0000000..b18007f --- /dev/null +++ b/app/Jobs/BrowseSingleRSSJob.php @@ -0,0 +1,61 @@ +rss_url = $rss_url; + + $this->hours = $hours; + } + + /** + * Execute the job. + */ + public function handle(): void + { + $raw_posts = BrowseRSSLatestNewsTask::handleSingle($this->rss_url, $this->hours); + + foreach ($raw_posts as $raw_post) { + $rss_post = RssPost::where('post_url', $raw_post->link)->first(); + + if (is_null($rss_post)) { + $rss_post = new RssPost; + $rss_post->post_url = $raw_post->link; + $rss_post->source = $raw_post->source; + $rss_post->source_url = $raw_post->source_url; + $rss_post->title = remove_newline($raw_post->title); + $rss_post->slug = str_slug(remove_newline($raw_post->title)); + $rss_post->published_at = $raw_post->date; + $rss_post->status = 'draft'; + + if ($rss_post->save()) { + CrawlRssPostJob::dispatch($rss_post->id)->onConnection('default')->onQueue('default'); + } + } + + } + + } +} diff --git a/app/Jobs/CrawlRssPostJob.php b/app/Jobs/CrawlRssPostJob.php new file mode 100644 index 0000000..ffb2d79 --- /dev/null +++ b/app/Jobs/CrawlRssPostJob.php @@ -0,0 +1,35 @@ +rss_post_id = $rss_post_id; + } + + /** + * Execute the job. + */ + public function handle(): void + { + CrawlRssPostTask::handle($this->rss_post_id); + } +} diff --git a/app/Jobs/ParseRssPostMetadataJob.php b/app/Jobs/ParseRssPostMetadataJob.php new file mode 100644 index 0000000..049eca3 --- /dev/null +++ b/app/Jobs/ParseRssPostMetadataJob.php @@ -0,0 +1,35 @@ +rss_post_id = $rss_post_id; + } + + /** + * Execute the job. + */ + public function handle(): void + { + ParseRssPostMetadataTask::handle($this->rss_post_id); + } +} diff --git a/app/Jobs/Tasks/BrowseRSSLatestNewsTask.php b/app/Jobs/Tasks/BrowseRSSLatestNewsTask.php new file mode 100644 index 0000000..8ad1f28 --- /dev/null +++ b/app/Jobs/Tasks/BrowseRSSLatestNewsTask.php @@ -0,0 +1,60 @@ +get_items() as $item) { + $post_datetime = Carbon::parse($item->get_date(\DateTime::ATOM)); + + if (! $post_datetime->isBetween(now()->subHours($hours), now())) { + continue; + } + + $title = trim($item->get_title()); + $description = trim($item->get_content()); + + $raw_posts[] = (object) [ + 'source' => $f->get_title(), + 'source_url' => $rss_url, + 'title' => $title, + 'link' => $item->get_link(), + 'description' => $description, + 'date' => $post_datetime, + 'category' => $item->get_category()?->term, + ]; + } + + unset($f); + + return $raw_posts; + + } +} diff --git a/app/Jobs/Tasks/CrawlRssPostTask.php b/app/Jobs/Tasks/CrawlRssPostTask.php new file mode 100644 index 0000000..c32159b --- /dev/null +++ b/app/Jobs/Tasks/CrawlRssPostTask.php @@ -0,0 +1,167 @@ + $user_agent, + ]) + ->withOptions([ + 'proxy' => get_smartproxy_rotating_server(), + 'timeout' => 10, + 'verify' => false, + ]) + ->get($rss_post->post_url); + + if ($response->successful()) { + $raw_html = $response->body(); + $costs['unblocker'] = calculate_smartproxy_cost(round(strlen($raw_html) / 1024, 2), 'rotating_global'); + } else { + $raw_html = null; + $response->throw(); + } + + } catch (Exception $e) { + $raw_html = null; + } + + if (! is_empty($raw_html)) { + $rss_post->body = self::getMarkdownFromHtml($raw_html); + } else { + $rss_post->body = 'EMPTY CONTENT'; + } + + if ((is_empty($rss_post->body)) || ($rss_post->body == 'EMPTY CONTENT') || (strlen($rss_post->body) < 800)){ + $rss_post->status = 'blocked'; + } + + if ($rss_post->save()) { + + if (! in_array($rss_post->status, ['blocked', 'trashed'])) { + ParseRssPostMetadataJob::dispatch($rss_post->id)->onConnection('default')->onQueue('default'); + } + } + } + + private static function getMarkdownFromHtml($html) + { + + $converter = new HtmlConverter([ + 'strip_tags' => true, + 'strip_placeholder_links' => true, + ]); + + $html = self::cleanHtml($html); + + $markdown = $converter->convert($html); + + //dd($markdown); + + $markdown = self::reverseLTGT($markdown); + + $markdown = self::normalizeNewLines($markdown); + + $markdown = self::removeDuplicateLines($markdown); + + return html_entity_decode(markdown_to_plaintext($markdown)); + } + + private static function reverseLTGT($input) + { + $output = str_replace('<', '<', $input); + $output = str_replace('>', '>', $output); + + return $output; + } + + private static function removeDuplicateLines($string) + { + $lines = explode("\n", $string); + $uniqueLines = array_unique($lines); + + return implode("\n", $uniqueLines); + } + + private static function normalizeNewLines($content) + { + // Split the content by lines + $lines = explode("\n", $content); + + $processedLines = []; + + for ($i = 0; $i < count($lines); $i++) { + $line = trim($lines[$i]); + + // If the line is an image markdown + if (preg_match("/^!\[.*\]\(.*\)$/", $line)) { + // And if the next line is not empty and not another markdown structure + if (isset($lines[$i + 1]) && ! empty(trim($lines[$i + 1])) && ! preg_match('/^[-=#*&_]+$/', trim($lines[$i + 1]))) { + $line .= ' '.trim($lines[$i + 1]); + $i++; // Skip the next line as we're merging it + } + } + + // Add line to processedLines if it's not empty + if (! empty($line)) { + $processedLines[] = $line; + } + } + + // Collapse excessive newlines + $result = preg_replace("/\n{3,}/", "\n\n", implode("\n", $processedLines)); + + // Detect and replace the pattern + $result = preg_replace('/^(!\[.*?\]\(.*?\))\s*\n\s*([^\n!]+)/m', '$1 $2', $result); + + // Replace multiple spaces with a dash separator + $result = preg_replace('/ {2,}/', ' - ', $result); + + return $result; + } + + private static function cleanHtml($htmlContent) + { + $crawler = new Crawler($htmlContent); + + // Define tags to remove completely + $tagsToRemove = ['script', 'style', 'svg', 'picture', 'form', 'footer', 'nav', 'aside']; + + foreach ($tagsToRemove as $tag) { + $crawler->filter($tag)->each(function ($node) { + foreach ($node as $child) { + $child->parentNode->removeChild($child); + } + }); + } + + // Replace tags with their inner content + $crawler->filter('span')->each(function ($node) { + $replacement = new \DOMText($node->text()); + + foreach ($node as $child) { + $child->parentNode->replaceChild($replacement, $child); + } + }); + + return $crawler->outerHtml(); + } +} diff --git a/app/Jobs/Tasks/FillPostMetadataTask.php b/app/Jobs/Tasks/FillPostMetadataTask.php index 8b8b98d..0b5232a 100644 --- a/app/Jobs/Tasks/FillPostMetadataTask.php +++ b/app/Jobs/Tasks/FillPostMetadataTask.php @@ -176,8 +176,6 @@ private static function setPostImage($post) continue; } - - try { $main_image_url = $serp_url_research->main_image; @@ -215,7 +213,7 @@ private static function setPostImage($post) $image->destroy(); break; - + } catch (Exception $e) { continue; } diff --git a/app/Jobs/Tasks/ParseDFSNewsTask.php b/app/Jobs/Tasks/ParseDFSNewsTask.php index 459e9bf..74056a2 100644 --- a/app/Jobs/Tasks/ParseDFSNewsTask.php +++ b/app/Jobs/Tasks/ParseDFSNewsTask.php @@ -96,9 +96,8 @@ public static function handle(NewsSerpResult $news_serp_result, $serp_counts = 1 continue; } - if ($serp_url->picked == true) - { - continue; + if ($serp_url->picked == true) { + continue; } } diff --git a/app/Jobs/Tasks/ParseRssPostMetadataTask.php b/app/Jobs/Tasks/ParseRssPostMetadataTask.php new file mode 100644 index 0000000..d599213 --- /dev/null +++ b/app/Jobs/Tasks/ParseRssPostMetadataTask.php @@ -0,0 +1,117 @@ +status, ['blocked', 'trashed'])) { + return; + } + + if (! is_null($rss_post->metadata)) { + $post_meta_response = $rss_post->metadata; + } else { + $post_meta_response = OpenAI::getRssPostMeta($rss_post->body, 1536, 30); + + if ((isset($post_meta_response->output)) && (! is_null($post_meta_response->output))) { + $service_cost_usage = new ServiceCostUsage; + $service_cost_usage->cost = $post_meta_response->cost; + $service_cost_usage->name = 'openai-getRssPostMeta'; + $service_cost_usage->reference_1 = 'rss_post'; + $service_cost_usage->reference_2 = strval($rss_post->id); + $service_cost_usage->output = $post_meta_response; + $service_cost_usage->save(); + } + } + + $words_to_add_in_body = []; + + if ((isset($post_meta_response->output)) && (! is_null($post_meta_response->output))) { + + $rss_post->metadata = $post_meta_response; + + if (isset($post_meta_response->output->title)) { + if (! is_empty($post_meta_response->output->title)) { + $rss_post->title = $post_meta_response->output->title; + $rss_post->slug = ($post_meta_response->output->title); + } + } + + if (isset($post_meta_response->output->keywords)) { + if (count($post_meta_response->output->keywords) > 0) { + $rss_post->keywords = $post_meta_response->output->keywords; + + foreach ($post_meta_response->output->keywords as $word) + { + $words_to_add_in_body[] = $word; + } + } + } + + if (isset($post_meta_response->output->entities)) { + if (count($post_meta_response->output->entities) > 0) { + $rss_post->entities = $post_meta_response->output->entities; + + foreach ($post_meta_response->output->entities as $word) + { + $words_to_add_in_body[] = $word; + } + } + } + + if (isset($post_meta_response->output->summary)) { + if (! is_empty($post_meta_response->output->summary)) { + $rss_post->bites = $post_meta_response->output->summary; + } + } + + if (isset($post_meta_response->output->society_impact)) { + if (! is_empty($post_meta_response->output->society_impact)) { + $rss_post->impact = $post_meta_response->output->society_impact; + } + } + + if (isset($post_meta_response->output->society_impact_level)) { + if (! is_empty($post_meta_response->output->society_impact_level)) { + $rss_post->impact_level = $post_meta_response->output->society_impact_level; + } + } + + // Category + $category_name = 'Updates'; + + if ((isset($post_meta_response->output->category)) && (! is_empty($post_meta_response->output->category))) { + $category_name = $post_meta_response?->output?->category; + } + + $category = Category::where('name', $category_name)->first(); + + if (is_null($category)) { + $category = Category::where('name', 'Updates')->first(); + } + + $rss_post->category_id = $category->id; + } + + $post_body = $rss_post->body; + $post_body .= implode($words_to_add_in_body); + $rss_post->body = $post_body; + + $rss_post->status = 'published'; + $rss_post->save(); + + } +} diff --git a/app/Jobs/Tasks/PublishIndexPostTask.php b/app/Jobs/Tasks/PublishIndexPostTask.php index 413fe0c..9ac1172 100644 --- a/app/Jobs/Tasks/PublishIndexPostTask.php +++ b/app/Jobs/Tasks/PublishIndexPostTask.php @@ -3,11 +3,11 @@ namespace App\Jobs\Tasks; use App\Models\Post; +use App\Notifications\PostWasPublished; use Exception; +use Illuminate\Support\Facades\Notification; use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow; use LaravelGoogleIndexing; -use Illuminate\Support\Facades\Notification; -use App\Notifications\PostWasPublished; class PublishIndexPostTask { @@ -37,12 +37,7 @@ public static function handle(int $post_id) } - Notification::route('facebook','default')->notify(new PostWasPublished($post)); - - - - - + Notification::route('facebook', 'default')->notify(new PostWasPublished($post)); } } diff --git a/app/Models/RssPost.php b/app/Models/RssPost.php new file mode 100644 index 0000000..01f82cf --- /dev/null +++ b/app/Models/RssPost.php @@ -0,0 +1,69 @@ + 'int', + 'published_at' => 'datetime', + 'metadata' => 'object', + 'keywords' => 'array', + 'entities' => 'array', + ]; + + protected $fillable = [ + 'category_id', + 'source', + 'source_url', + 'post_url', + 'title', + 'slug', + 'body', + 'keywords', + 'entities', + 'metadata', + 'bites', + 'impact', + 'impact_level', + 'published_at', + 'status', + ]; + + public function category() + { + return $this->belongsTo(Category::class); + } +} diff --git a/app/Notifications/PostWasPublished.php b/app/Notifications/PostWasPublished.php index 1a16e0b..a839a17 100644 --- a/app/Notifications/PostWasPublished.php +++ b/app/Notifications/PostWasPublished.php @@ -3,10 +3,7 @@ namespace App\Notifications; use Illuminate\Bus\Queueable; -use Illuminate\Contracts\Queue\ShouldQueue; -use Illuminate\Notifications\Messages\MailMessage; use Illuminate\Notifications\Notification; - use NotificationChannels\FacebookPoster\FacebookPosterChannel; use NotificationChannels\FacebookPoster\FacebookPosterPost; @@ -34,8 +31,8 @@ public function via(object $notifiable): array return [FacebookPosterChannel::class]; } - - public function toFacebookPoster($notifiable) { + public function toFacebookPoster($notifiable) + { return (new FacebookPosterPost(str_first_sentence($this->post->bites)))->withLink(route('front.post', ['slug' => $this->post->slug, 'category_slug' => $this->post->category->slug])); } } diff --git a/composer.json b/composer.json index 599f0c6..aa2a20e 100644 --- a/composer.json +++ b/composer.json @@ -37,6 +37,7 @@ "symfony/dom-crawler": "^6.3", "tightenco/ziggy": "^1.6", "torann/json-ld": "^0.0.19", + "vedmant/laravel-feed-reader": "^1.6", "watson/active": "^7.0" }, "require-dev": { diff --git a/composer.lock b/composer.lock index 4120cc5..8e4351c 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "776af1e07123b90c65fa82b84aec0043", + "content-hash": "7419246584579187134315a95c74fa42", "packages": [ { "name": "artesaos/seotools", @@ -5744,6 +5744,82 @@ }, "time": "2020-08-20T12:19:16+00:00" }, + { + "name": "simplepie/simplepie", + "version": "1.8.0", + "source": { + "type": "git", + "url": "https://github.com/simplepie/simplepie.git", + "reference": "65b095d87bc00898d8fa7737bdbcda93a3fbcc55" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/simplepie/simplepie/zipball/65b095d87bc00898d8fa7737bdbcda93a3fbcc55", + "reference": "65b095d87bc00898d8fa7737bdbcda93a3fbcc55", + "shasum": "" + }, + "require": { + "ext-pcre": "*", + "ext-xml": "*", + "ext-xmlreader": "*", + "php": ">=7.2.0" + }, + "require-dev": { + "friendsofphp/php-cs-fixer": "^2.19 || ^3.8", + "psr/simple-cache": "^1 || ^2 || ^3", + "yoast/phpunit-polyfills": "^1.0.1" + }, + "suggest": { + "ext-curl": "", + "ext-iconv": "", + "ext-intl": "", + "ext-mbstring": "", + "mf2/mf2": "Microformat module that allows for parsing HTML for microformats" + }, + "type": "library", + "autoload": { + "psr-0": { + "SimplePie": "library" + }, + "psr-4": { + "SimplePie\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Ryan Parman", + "homepage": "http://ryanparman.com/", + "role": "Creator, alumnus developer" + }, + { + "name": "Sam Sneddon", + "homepage": "https://gsnedders.com/", + "role": "Alumnus developer" + }, + { + "name": "Ryan McCue", + "email": "me@ryanmccue.info", + "homepage": "http://ryanmccue.info/", + "role": "Developer" + } + ], + "description": "A simple Atom/RSS parsing library for PHP", + "homepage": "http://simplepie.org/", + "keywords": [ + "atom", + "feeds", + "rss" + ], + "support": { + "issues": "https://github.com/simplepie/simplepie/issues", + "source": "https://github.com/simplepie/simplepie/tree/1.8.0" + }, + "time": "2023-01-20T08:37:35+00:00" + }, { "name": "spatie/browsershot", "version": "3.60.0", @@ -9204,6 +9280,67 @@ }, "time": "2020-03-10T17:25:19+00:00" }, + { + "name": "vedmant/laravel-feed-reader", + "version": "1.6.0", + "source": { + "type": "git", + "url": "https://github.com/vedmant/laravel-feed-reader.git", + "reference": "0839e0bfc3b86675616b6bdff28bc0533a073145" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/vedmant/laravel-feed-reader/zipball/0839e0bfc3b86675616b6bdff28bc0533a073145", + "reference": "0839e0bfc3b86675616b6bdff28bc0533a073145", + "shasum": "" + }, + "require": { + "ext-curl": "*", + "illuminate/support": "~5.5.0|~5.6.0|~5.7.0|~5.8.0|^6.0|^7.0|^8.0|^9.0|^10.0", + "php": ">=5.6.0", + "simplepie/simplepie": "^1.8.0" + }, + "require-dev": { + "orchestra/testbench": "^7.0", + "phpunit/phpunit": "^9.5" + }, + "type": "library", + "extra": { + "laravel": { + "providers": [ + "Vedmant\\FeedReader\\FeedReaderServiceProvider" + ], + "aliases": { + "FeedReader": "Vedmant\\FeedReader\\Facades\\FeedReader" + } + } + }, + "autoload": { + "psr-4": { + "Vedmant\\FeedReader\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Vedmant", + "email": "vedmant@gmail.com" + }, + { + "name": "Andrew Judd", + "email": "contact@andrewjudd.ca" + } + ], + "description": "A simple RSS feed reader for Laravel", + "support": { + "issues": "https://github.com/vedmant/laravel-feed-reader/issues", + "source": "https://github.com/vedmant/laravel-feed-reader/tree/1.6.0" + }, + "time": "2023-07-10T08:51:53+00:00" + }, { "name": "vlucas/phpdotenv", "version": "v5.6.0", diff --git a/config/feed-reader.php b/config/feed-reader.php new file mode 100644 index 0000000..3123acd --- /dev/null +++ b/config/feed-reader.php @@ -0,0 +1,71 @@ + [ + + /** + * The default configuration information + * + * @var array + */ + 'default' => [ + + /** + * All the cache settings + * + * @var array + */ + 'cache' => [ + + /** + * How long the cache is maintained in seconds + * + * @var int + */ + 'duration' => 3600, + + /** + * Whether caching is enabled. + * + * @var bool + */ + 'enabled' => true, + + /** + * The laravel cache driver used for caching + * + * @var string + */ + 'driver' => env('CACHE_DRIVER', 'file'), + ], + + /** + * Whether to force the data feed to be treated as an + * RSS feed. + * + * @var bool + */ + 'force-feed' => false, + + /** + * Whether the RSS feed should have its output ordered by date. + * + * @var bool + */ + 'order-by-date' => false, + + /** + * Whether it should verify SSL, set false to make it work with self-signed certificates + * + * @var bool + */ + 'ssl-verify' => true, + ], + ], +]; diff --git a/config/platform/global.php b/config/platform/global.php index 615169a..dbdde88 100644 --- a/config/platform/global.php +++ b/config/platform/global.php @@ -10,7 +10,96 @@ 'https://u.today', ], + 'rss' => [ + 'http://news.ycombinator.com/rss', + // 'http://blog.samaltman.com/posts.atom', + // 'http://andrewchen.co/feed/', + 'http://feeds.slashgear.com/slashgear', + 'http://venturebeat.com/feed/', + 'http://www.theverge.com/rss/full.xml', + 'http://www.engadget.com/rss-full.xml', + 'https://feeds2.feedburner.com/PennOlson', + 'https://techcrunch.com/feed/', + 'http://www.fastcodesign.com/rss.xml', + 'https://techcrunch.com/startups/feed/', + 'http://www.forbes.com/entrepreneurs/index.xml', + 'http://www.producthunt.com/feed', + 'http://hnrss.org/show', + 'https://hnrss.org/launches', + 'https://hnrss.org/newest?q=ai', + 'https://hnrss.org/newest?q=startup', + 'https://hnrss.org/newest?q=launch', + 'https://hnrss.org/newest?q=model', + 'https://hnrss.org/newest?q=stable%20diffusion', + 'https://hnrss.org/newest?q=midjourney', + 'http://feeds.feedburner.com/sachinrekhiblog', + 'https://deepmind.com/blog/feed/basic/', + 'https://news.mit.edu/rss/topic/artificial-intelligence2', + 'http://bair.berkeley.edu/blog/feed.xml', + 'https://blog.openai.com/rss/', + 'http://googleresearch.blogspot.com/atom.xml', + 'https://www.artificial-intelligence.blog/ai-news?format=rss', + 'https://www.marktechpost.com/feed/', + 'https://wgmimedia.com/feed/', + 'https://dailyai.com/feed/', + 'https://www.sciencedaily.com/rss/computers_math/artificial_intelligence.xml', + ], + + 'whitelist_keywords_rss' => [ + 'ai', + 'gpt', + 'artificial intelligence', + 'openai', + 'cure', + 'llm', + 'llms', + 'mistral', + 'llama', + 'huggingface', + 'sam altman', + 'microsoft', + 'vulkan', + 'facial recognition', + 'deep learning', + 'machine learning', + 'dall-e', + 'stable diffusion', + 'sdxl', + 'sd', + 'midjourney', + 'ai-driven', + 'neural network', + 'training machine', + 'computing', + 'mimic', + 'mimics', + 'breakthrough', + 'groundbreaking', + 'recognition', + 'implants', + 'robot', + 'robots', + 'chatgpt', + 'model', + 'chatbot', + 'innovation', + 'gpt-4', + 'gpt-3', + 'gpt-2', + '3d', + 'vector database', + 'embedding', + 'embeddings', + 'vr', + 'tech', + ], + + 'blacklist_keywords_rss' => [ + ], + 'blacklist_keywords_serp' => [ + 'defense', + 'war', 'government', 'usa', 'china', @@ -24,12 +113,8 @@ 'politic', 'contract', 'negotiat', - 'complete', 'gun', - 'safety', 'wrest', - 'control', - 'opinion', 'cop', 'race', 'porn', @@ -62,8 +147,6 @@ 'agenc', 'presiden', 'avoid', - 'study', - 'expert', 'agreement', 'protection', 'survey', @@ -89,7 +172,6 @@ 'concern', 'ethic', 'underage', - 'guide', ], diff --git a/database/migrations/2023_09_22_154137_create_serp_urls_table.php b/database/migrations/2023_09_22_154137_create_serp_urls_table.php index 1a2e73f..9a62f48 100644 --- a/database/migrations/2023_09_22_154137_create_serp_urls_table.php +++ b/database/migrations/2023_09_22_154137_create_serp_urls_table.php @@ -17,6 +17,7 @@ public function up(): void $table->foreignId('category_id')->nullable(); $table->string('category_name')->nullable(); $table->string('source')->default('serp'); + $table->string('source_url')->nullable(); $table->string('url'); $table->string('country_iso'); $table->string('title')->nullable(); diff --git a/database/migrations/2023_11_19_121001_create_posts_table.php b/database/migrations/2023_11_19_121001_create_posts_table.php index de6cd3a..89c5bbf 100644 --- a/database/migrations/2023_11_19_121001_create_posts_table.php +++ b/database/migrations/2023_11_19_121001_create_posts_table.php @@ -21,6 +21,7 @@ public function up(): void $table->mediumText('bites')->nullable(); $table->mediumText('society_impact')->nullable(); $table->enum('society_impact_level', ['low', 'medium', 'high'])->default('low'); + $table->string('image_ref_url')->nullable(); $table->foreignId('author_id')->nullable(); $table->mediumText('featured_image')->nullable(); $table->text('body')->nullable(); diff --git a/database/migrations/2023_11_20_020253_add_image_ref_url_to_posts_table.php b/database/migrations/2023_11_20_020253_add_image_ref_url_to_posts_table.php deleted file mode 100644 index 406e569..0000000 --- a/database/migrations/2023_11_20_020253_add_image_ref_url_to_posts_table.php +++ /dev/null @@ -1,28 +0,0 @@ -string('image_ref_url')->nullable(); - }); - } - - /** - * Reverse the migrations. - */ - public function down(): void - { - Schema::table('posts', function (Blueprint $table) { - $table->dropColumn(('image_ref_url')); - }); - } -}; diff --git a/database/migrations/2023_11_21_073326_create_rss_posts_table.php b/database/migrations/2023_11_21_073326_create_rss_posts_table.php new file mode 100644 index 0000000..6e76274 --- /dev/null +++ b/database/migrations/2023_11_21_073326_create_rss_posts_table.php @@ -0,0 +1,56 @@ +id(); + $table->foreignId('category_id')->nullable(); + $table->string('source'); + $table->string('source_url'); + $table->string('post_url'); + $table->string('title'); + $table->string('slug'); + $table->text('body')->nullable(); + $table->jsonb('keywords')->nullable(); + $table->jsonb('entities')->nullable(); + $table->json('metadata')->nullable(); + $table->mediumText('bites')->nullable(); + $table->mediumText('impact')->nullable(); + $table->enum('impact_level', ['low', 'medium', 'high'])->default('low'); + $table->datetime('published_at'); + $table->enum('status', ['draft', 'published', 'blocked', 'trashed'])->default('draft'); + $table->timestamps(); + + $table->index('title'); + $table->index('slug'); + $table->index('published_at'); + + $table->foreign('category_id')->references('id')->on('categories'); + }); + + DB::statement('CREATE INDEX idx_rss_posts_entities ON rss_posts USING gin (entities jsonb_path_ops)'); + + + DB::statement('CREATE INDEX idx_rss_posts_keywords ON rss_posts USING gin (keywords jsonb_path_ops)'); + + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::dropIfExists('rss_posts'); + } +}; diff --git a/resources/sass/app-front.scss b/resources/sass/app-front.scss index 79a6d19..43b11c3 100644 --- a/resources/sass/app-front.scss +++ b/resources/sass/app-front.scss @@ -1,4 +1,4 @@ -@import url(https://fonts.bunny.net/css?family=noto-serif:300,400,700,700i|roboto-condensed:700); +@import url(https://fonts.bunny.net/css?family=noto-serif:300,400,700,700i|roboto-condensed:300,400,500,600,700); @import "variables"; diff --git a/resources/views/front/partials/about.blade.php b/resources/views/front/partials/about.blade.php index 411e0a8..c6d3bfd 100644 --- a/resources/views/front/partials/about.blade.php +++ b/resources/views/front/partials/about.blade.php @@ -1,9 +1,6 @@ diff --git a/resources/views/front/partials/news_bites.blade.php b/resources/views/front/partials/news_bites.blade.php new file mode 100644 index 0000000..6ca9516 --- /dev/null +++ b/resources/views/front/partials/news_bites.blade.php @@ -0,0 +1,86 @@ +
+ + @foreach ($rss_posts as $key => $post) + +
+

+ +

+
+
+
{{ $post->bites }}
+ + @if (in_array($post->impact_level, ['medium','high'])) +
+
+
Social Impact:
+
{{ $post->impact }}
+
+
+ @endif + @if ($post->entities) +
+ More about: @foreach( $post->keywords as $keyword) {{ $keyword }} @endforeach +
+ @endif + +
+
+
+ + @endforeach + + +
\ No newline at end of file diff --git a/resources/views/front/post_list.blade.php b/resources/views/front/post_list.blade.php index cf66614..f948395 100644 --- a/resources/views/front/post_list.blade.php +++ b/resources/views/front/post_list.blade.php @@ -14,9 +14,9 @@ @endif @else @if (isset($category) && !is_null($category)) - {{ $category->name }} News from FutureWalker + {{ $category->name }} News Bites from FutureWalker @else - AI & Tech News from FutureWalker + AI & Tech News Bites from FutureWalker @endif @endif diff --git a/resources/views/front/rss_post_list.blade.php b/resources/views/front/rss_post_list.blade.php new file mode 100644 index 0000000..5bcf232 --- /dev/null +++ b/resources/views/front/rss_post_list.blade.php @@ -0,0 +1,55 @@ +@extends('front.layouts.app') +@section('content') +
+ + @include('front.partials.breadcrumbs') + +
+
+

+ + @if ($page_type == 'search') + @if (isset($title)) + {{ $title }} + @endif + @else + @if (isset($category) && !is_null($category)) + Latest {{ $category->name }} from FutureWalker + @else + AI & Tech News from FutureWalker + @endif + @endif +

+ + @include('front.partials.news_bites') + +
+ @if ($rss_posts->count() > 0) + @if ($rss_posts instanceof \Illuminate\Pagination\CursorPaginator) +
+ {{ $rss_posts->links('pagination::simple-bootstrap-5-rounded') }} +
+ @endif + @else +
+
No 🍪 bites found yet.
+ +
+ @endif +
+ + +
+ +
+
+ @include('front.partials.sidebar') +
+
+
+
+@endsection + +@push('top_head') + {!! $breadcrumb_context !!} +@endpush diff --git a/resources/views/front/welcome.blade.php b/resources/views/front/welcome.blade.php index 107540a..356750f 100644 --- a/resources/views/front/welcome.blade.php +++ b/resources/views/front/welcome.blade.php @@ -8,7 +8,7 @@
Your future depends on today's news.

In the fast-evolving world of AI and tech, staying updated is not - optional—it's critical for your future success. Stay updated with daily news from + optional—it's critical for your future success. Stay updated with daily news 🍪 bites from FutureWalker.

Start reading @@ -18,7 +18,7 @@ - @if ($featured_posts->count() > 0) +{{-- @if ($featured_posts->count() > 0)

Featured News

@@ -50,16 +50,34 @@ class="text-secondary">{{ $post->published_at->diffForHumans() }} @endforeach
- @endif - - @if ($latest_posts->count() > 0) -
+ @endif --}} -
-
+
+ +
+
+ + @if ($rss_posts->count() > 0) +
+

🍪 Tech Bites

+
1-2 minute news bites worth every second
+
+ + + @include('front.partials.news_bites') + +
+ + + @endif + +{{-- @if ($latest_posts->count() > 0)
-

Latest News

+

Latest News

@foreach ($latest_posts as $post) @@ -68,7 +86,7 @@ class="text-secondary">{{ $post->published_at->diffForHumans() }}
- +
@@ -85,16 +103,20 @@ class="fw-bold"
@foreach ($post->keywords as $keyword) @if ($keyword == $post->main_keyword) -

{{ $keyword }}

+

+ {{ $keyword }}

@else -

{{ $keyword }}

+

+ {{ $keyword }}

@endif @endforeach
- @if($post->category) - {{ $post->category->name }} + @if ($post->category) + {{ $post->category->name }} @endif @@ -119,21 +141,19 @@ class="d-inline text-secondary small">{{ markdown_min_read($post->body) }}Discover More News
-
-
-
- @include('front.partials.sidebar') + @endif --}} + +
+
+
+ @include('front.partials.sidebar') -
- -
- @endif - +
@endsection diff --git a/routes/tests.php b/routes/tests.php index 99c045b..aac3cbf 100644 --- a/routes/tests.php +++ b/routes/tests.php @@ -3,6 +3,7 @@ use App\Helpers\FirstParty\OpenAI\OpenAI; use App\Jobs\AISerpGenArticleJob; use App\Jobs\BrowseAndWriteWithAIJob; +use App\Jobs\BrowseRSSPostJob; use App\Jobs\FillPostMetadataJob; use App\Jobs\GenerateArticleFeaturedImageJob; use App\Jobs\GenerateArticleJob; @@ -34,11 +35,24 @@ | */ +Route::get('/opml', [App\Http\Controllers\Tests\TestController::class, 'opml']); + Route::get('/notification', [App\Http\Controllers\Tests\TestController::class, 'notification']); - Route::get('/image_gen', [App\Http\Controllers\Tests\TestController::class, 'imageGen']); +Route::get('/fire_rss', function (Request $request) { + + $hours = $request->input('hours'); + + if (is_empty($hours)) + { + return "Missing 'hours'."; + } + + BrowseRSSPostJob::dispatch($hours)->onQueue('default')->onConnection('default'); +}); + Route::get('/incomplete/post', function (Request $request) { $post = Post::find(1); diff --git a/routes/web.php b/routes/web.php index 5a53523..d4711b2 100644 --- a/routes/web.php +++ b/routes/web.php @@ -27,6 +27,8 @@ Route::post('/search', [App\Http\Controllers\Front\FrontListController::class, 'search'])->name('front.search'); +Route::get('/search/{query}', [App\Http\Controllers\Front\FrontListController::class, 'searchResults'])->name('front.search.results'); + Route::get('/{category_slug}/{slug}', [App\Http\Controllers\Front\FrontPostController::class, 'index'])->name('front.post'); Route::get('/{category_slug}', [App\Http\Controllers\Front\FrontListController::class, 'category'])