Add (news bites)

This commit is contained in:
2023-11-21 19:18:11 +08:00
parent 2e38a4120c
commit 3210563e42
37 changed files with 1581 additions and 153 deletions

View File

@@ -2,7 +2,8 @@
namespace App\Console;
use App\Jobs\BrowseAndWriteWithAIJob;
use App\Jobs\BrowseDFSAndWriteWithAIJob;
use App\Jobs\BrowseRSSPostJob;
use App\Jobs\PublishIndexPostJob;
use App\Models\Post;
use Illuminate\Console\Scheduling\Schedule;
@@ -15,20 +16,24 @@ class Kernel extends ConsoleKernel
*/
protected function schedule(Schedule $schedule): void
{
$schedule->command('sitemap:generate')->daily()->name('sitemap-generate-daily');
$schedule->command('sitemap:generate')->everySixHours()->name('sitemap-generate-every-six-hours');
$schedule->call(function () {
BrowseAndWriteWithAIJob::dispatch()->onQueue('default')->onConnection('default');
})->everySixHours()->name('write-a-job-6hrs');
BrowseRSSPostJob::dispatch(1)->onQueue('default')->onConnection('default');
})->hourly()->name('browse-rss-post-job-hourly');
$schedule->call(function () {
$future_post = Post::whereNotNull('published_at')->where('status', 'future')->where('published_at', '<=', now())->orderBy('published_at', 'ASC')->first();
// $schedule->call(function () {
// BrowseDFSAndWriteWithAIJob::dispatch()->onQueue('default')->onConnection('default');
// })->everySixHours()->name('write-a-job-6hrs');
if (! is_null($future_post)) {
PublishIndexPostJob::dispatch($future_post->id)->onQueue('default')->onConnection('default');
}
// $schedule->call(function () {
// $future_post = Post::whereNotNull('published_at')->where('status', 'future')->where('published_at', '<=', now())->orderBy('published_at', 'ASC')->first();
})->everyMinute()->name('schedule-future-post');
// if (! is_null($future_post)) {
// PublishIndexPostJob::dispatch($future_post->id)->onQueue('default')->onConnection('default');
// }
// })->everyMinute()->name('schedule-future-post');
}

View File

@@ -8,6 +8,16 @@
class OpenAI
{
public static function getRssPostMeta($user_prompt, $model_max_tokens = 1536, $timeout = 60)
{
$openai_config = 'openai-gpt-4-turbo';
$system_prompt = "Based on given article, populate the following in valid JSON format\n{\n\"title\":\"(Title based on article)\",\n\"keywords\":[\"(Important keywords in 1-2 words per keyword)\"],\n\"category\":\"(Updates|Opinions|Features|New Launches|How Tos|Reviews)\",\n\"summary\":\"(Summarise article in 60-90 words to help readers understand what article is about)\",\n\"entities\":[(List of companies, brands that are considered as main entites in 1-2 words. per entity)],\n\"society_impact\":\"(Explain in 30-50 words how this article content's can impact society on technological aspect)\",\n\"society_impact_level:\"(low|medium|high)\"\n}";
return self::getChatCompletion($user_prompt, $system_prompt, $openai_config, $model_max_tokens, $timeout);
}
public static function getArticleMeta($user_prompt, $model_max_tokens = 1536, $timeout = 60)
{

View File

@@ -0,0 +1,315 @@
<?php
/**
* A PHP-based OPML (Outline Processor Markup Language) Parser Class. Extracts the properties of content from OPML files
*
* @author Ivan Melgrati
*
* @version 2.2.0
* https://github.com/imelgrat/opml-parser
*
* Converted to PSR-4
*/
namespace App\Helpers\FirstParty\OpmlParser;
use Iterator;
class OpmlParser implements Iterator
{
/**
* Resource handle to an XML parser to be used by the other XML functions.
*
* @var resource
*/
protected $parser = null;
/**
* Position inside OPML list (used for iterating over OPML results)
*
* @var int
*/
protected $position = 0;
/**
* Array containing all parsed items
*
* @var array
*/
protected $opml_contents = [];
/**
* String containing the unparsed OPML string
*
* @var string
*/
protected $unparsed_opml = '';
/**
* Outline attributes we wish to map and their mapping names (only the most common attributes were added, more attributes may be added later)
*
* @var array
*/
protected $opml_map_vars = [
'ID' => 'id', // Unique element ID
'TYPE' => 'type', // Element type (audio, feed, playlist, etc)
'URL' => 'url', // Location of the item. Depending on the value of the type attribute, this can be either a single audio stream or audio playlist, a remote OPML file containing a playlist of audio items, or a remote OPML file to browse.
'HTMLURL' => 'html_url', // Top-level link element
'TEXT' => 'title', // Specifies the title of the item.
'TITLE' => 'title', // Specifies the title of the item.
'LANGUAGE' => 'language', // The value of the top-level language element
'TARGET' => 'link_target', // The target window of the link
'VERSION' => 'version', // Varies depending on the version of RSS that's being supplied. RSS1 for RSS 1.0; RSS for 0.91, 0.92 or 2.0; scriptingNews for scriptingNews format. There are no known values for Atom feeds, but they certainly could be provided.
'DESCRIPTION' => 'description', // The top-level description element from the feed.
'XMLURL' => 'xml_url', // The http address of the feed
'CREATED' => 'created', // Date-time that the outline node was created
'IMAGEHREF' => 'imageHref', // A link to an image related to the element (.e.g. a song poster)
'ICON' => 'icon', // A link to an icon related to the element (.e.g. a radio-station's icon)
'F' => 'song', // When used in OPML playlists, it's used to specify the song's filename.
'BITRATE' => 'bitrate', // Used to specify the bitrate of an audio stream, in kbps.
'MIME' => 'mime', // Enter the MIME type of the stream/file.
'DURATION' => 'duration', // If the item is not a live radio stream, set duration to the playback duration in seconds to ensure the progress bar is displayed correctly. This is especially helpful for VBR files where our bitrate detection may not work properly.
'LISTENERS' => 'listeners', // Used to display the number of listeners currently listening to an audio stream.
'CURRENT_TRACK' => 'current_track', // Used to display the track that was most recently playing on a radio station.
'GENRE' => 'genre', //The genre of a stream may be specified with this attribute.
'SOURCE' => 'source', // The source of the audio. This is currently used to describe, for instance, how a concert was recorded.
];
/**
* Constructor.
*
* @return OPML_Parser
*/
public function OPML_Parser()
{
$this->parser = null;
$this->opml_contents = [];
$this->position = 0;
}
/**
* OPML_Parser::rewind()
* This rewinds the iterator to the beginning.
*/
public function rewind(): void
{
$this->position = 0;
}
/**
* OPML_Parser::current()
* Return the current element
*
* @return mixed The current element
*/
public function current()
{
return $this->opml_contents[$this->position];
}
/**
* OPML_Parser::key()
* Return the key of the current element
*
* @return scalar The key of the current element
*/
public function key()
{
return $this->position;
}
/**
* OPML_Parser::next()
* Move he iterator to the next entry.
*/
public function next(): void
{
$this->position++;
}
/**
* OPML_Parser::valid()
* Checks if current position is valid
*
* @return bool Returns TRUE if the current position is valid (if the element exists)
*/
public function valid(): bool
{
return isset($this->opml_contents[$this->position]);
}
/**
* OPML_Parser::getOPMLFile()
* Fetch Contents of Page (from file or URL). Queries are performed using cURL and, if not available, using file_get_contents()
*
* @param string $location The location (file or URL) of the OPML file
* @param resource $context stream context from `stream_context_create()`. Contexts can be passed to most filesystem related stream creation functions (i.e. fopen(), file(), file_get_contents(), etc...).
* @return string contents of the page at $location
*/
protected function getOPMLFile($location = '', $context = null)
{
if (in_array('curl', get_loaded_extensions())) {
$options = [
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
CURLOPT_ENCODING => '', // handle compressed
CURLOPT_USERAGENT => 'test', // name of client
CURLOPT_AUTOREFERER => true, // set referrer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // time-out on connect
CURLOPT_TIMEOUT => 120, // time-out on response
];
$ch = curl_init($location);
curl_setopt_array($ch, $options);
$contents = curl_exec($ch);
} else {
$contents = file_get_contents($location, false, $context);
}
return $contents;
}
/**
* OPML_Parser::ParseElementStart()
* The XML tag-open handler. It is used here to parse and store attributes from outline tags
*
* @param resource $parser A reference to the XML parser calling the handler.
* @param string $tagName The name of the element (tag) for which this handler is called. If case-folding is in effect for this parser, the element name will be in uppercase letters.
* @param array $attrs The element's attributes (if any).The keys of this array are the attribute names, the values are the attribute values.Attribute names are case-folded on the same criteria as element names.
*/
protected function ParseElementStart($parser, $tagName, $attrs)
{
$map = $this->opml_map_vars;
// Parse attributes if entered an "outline" tag
if ($tagName == 'OUTLINE') {
$node = [];
foreach (array_keys($this->opml_map_vars) as $key) {
if (isset($attrs[$key])) {
$node[$key] = $attrs[$key];
}
}
$this->opml_contents[] = $node;
}
}
/**
* OPML_Parser::ParseElementEnd()
* The XML tag-close handler. It is used for processing closed tags (not used in this class but can be overloaded in child classes)
*
* @param resource $parser A reference to the XML parser calling the handler.
* @param string $tagName The name of the element (tag) for which this handler is called. If case-folding is in effect for this parser, the element name will be in uppercase letters.
*/
protected function ParseElementEnd($parser, $tagName)
{
// nothing to do.
}
/**
* OPML_Parser::ParseElementCharData()
* The XML char data handler. It is used for processing char data (not used in this class but can be overloaded in child classes)
*
* @param resource $parser A reference to the XML parser calling the handler.
* @param string $data contains the character data as a string. Character data handler is called for every piece of a text in the XML document. It can be called multiple times inside each fragment (e.g. for non-ASCII strings).
*/
protected function ParseElementCharData($parser, $data)
{
// nothing to do.
}
/**
* OPML_Parser::Parser()
* Parse the OPML data (resulting data stored in $opml_contents)
*
* @param string $XMLdata A reference to the XML parser calling the handler.
*/
protected function Parser($XMLdata)
{
// Reset iterator
$this->position = 0;
$this->parser = xml_parser_create();
xml_set_object($this->parser, $this);
xml_set_element_handler($this->parser, [&$this, 'ParseElementStart'], [&$this, 'ParseElementEnd']);
xml_set_character_data_handler($this->parser, [&$this, 'ParseElementCharData']);
xml_parse($this->parser, $XMLdata);
xml_parser_free($this->parser);
}
/**
* OPML_Parser::ParseLocation()
* Parse contents from OPML file or URL
*
* @param string $location The location (file or URL) of the OPML file
* @param resource $context stream context from `stream_context_create()`. Contexts can be passed to most filesystem related stream creation functions (i.e. fopen(), file(), file_get_contents(), etc...).
*/
public function ParseLocation($location, $context = null)
{
$this->unparsed_opml = trim($this->getOPMLFile($location, $context));
$this->Parser($this->unparsed_opml);
}
/**
* OPML_Parser::ParseOPML()
* Parse contents from OPML string
*
* @param string $opml The unparsed OPML string
*/
public function ParseOPML($opml)
{
$this->unparsed_opml = trim($opml);
$this->Parser($this->unparsed_opml);
}
/**
* OPML_Parser::getUnparsedOPML()
* Get the unparsed OPML string
*
* @return string The unparsed OPML string
*/
public function getUnparsedOPML()
{
return $this->unparsed_opml;
}
/**
* OPML_Parser::setAttribute()
* Add (or replace) an OPML attribute to parser's attribute list
*
* @param string $attribute The new attribute to parse (whitespace replaced by underscores)
* @param string $mapped_attribute The attribute's name to be returned. Defaults to the same attribute's name (in lowercase form)
*/
public function setAttribute($attribute, $mapped_attribute = '')
{
$attribute = strtoupper(preg_replace('/\s+/', '_', trim($attribute)));
if ($mapped_attribute != '') {
$mapped_attribute = strtoupper(preg_replace('/\s+/', '_', trim($mapped_attribute)));
} else {
$mapped_attribute = strtolower($attribute);
}
$this->opml_map_vars[$attribute] = $mapped_attribute;
}
/**
* OPML_Parser::unsetAttribute()
* Remove an OPML attribute to parser's attribute list
*
* @param string $attribute The attribute to remove (whitespace replaced by underscores)
*/
public function unsetAttribute($attribute)
{
$attribute = strtoupper(preg_replace('/\s+/', '_', trim($attribute)));
unset($this->opml_map_vars[$attribute]);
}
}

View File

@@ -83,6 +83,13 @@ function markdown_min_read($markdown)
}
}
if (! function_exists('min_read')) {
function min_read($string)
{
return read_duration(plain_text($string));
}
}
if (! function_exists('unslug')) {
function unslug($slug, $delimiter = '-')
{
@@ -150,6 +157,16 @@ function is_empty($value): bool
}
}
if (! function_exists('get_domain_from_url')) {
function get_domain_from_url($url)
{
$parse = parse_url($url);
return $parse['host'];
}
}
if (! function_exists('get_country_name_by_iso')) {
function get_country_name_by_iso($country_iso)
{

View File

@@ -4,6 +4,7 @@
use App\Http\Controllers\Controller;
use App\Models\Post;
use App\Models\RssPost;
use Artesaos\SEOTools\Facades\SEOMeta;
use Artesaos\SEOTools\Facades\SEOTools;
use GrahamCampbell\Markdown\Facades\Markdown;
@@ -18,13 +19,15 @@ public function home(Request $request)
// $query->whereNotIn('id', [$featured_post?->id]);
// })->where('status', 'publish')->orderBy('published_at', 'desc')->limit(5)->get();
$featured_posts = Post::where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(3)->get();
// $featured_posts = Post::where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(3)->get();
$latest_posts = Post::where(function ($query) use ($featured_posts) {
$query->whereNotIn('id', $featured_posts->pluck('id')->toArray());
})->where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(10)->get();
// $latest_posts = Post::where(function ($query) use ($featured_posts) {
// $query->whereNotIn('id', $featured_posts->pluck('id')->toArray());
// })->where('status', 'publish')->where('published_at', '<=', now())->orderBy('published_at', 'desc')->limit(10)->get();
return response(view('front.welcome', compact('featured_posts', 'latest_posts')), 200);
$rss_posts = RssPost::where('status', 'published')->orderBy('published_at', 'desc')->paginate(30);
return response(view('front.welcome', compact('rss_posts')), 200);
}
public function terms(Request $request)

View File

@@ -4,8 +4,7 @@
use App\Http\Controllers\Controller;
use App\Models\Category;
use App\Models\Post;
use App\Models\PostCategory;
use App\Models\RssPost;
use Artesaos\SEOTools\Facades\SEOTools;
use Illuminate\Http\Request;
use JsonLd\Context;
@@ -13,15 +12,24 @@
class FrontListController extends Controller
{
public function search(Request $request)
{
if (is_empty($request->input('query'))) {
return redirect()->back();
}
return redirect()->to(route('front.search.results',['query' => $request->input('query')]));
}
public function searchResults(Request $request, $query)
{
$page_type = 'search';
$query = $request->get('query', '');
$query = strtolower($query);
$breadcrumbs = collect([
['name' => 'Home', 'url' => route('front.home')],
['name' => 'Search', 'url' => null],
['name' => $query, 'url' => url()->current()],
['name' => 'News Bites', 'url' => route('front.search.results',['query' => $query])],
['name' => $query, 'url' => null],
]);
$title = 'Latest News about '.ucwords($query).' in FutureWalker';
@@ -32,14 +40,13 @@ public function search(Request $request)
SEOTools::jsonLd();
SEOTools::setTitle($title, false);
// Use full-text search capabilities of your database
// For example, using MySQL's full-text search with MATCH...AGAINST
$posts = Post::with('category')
->where('status', 'publish')
->whereRaw("to_tsvector('english', title || ' ' || bites) @@ to_tsquery('english', ?)", [str_replace(' ', ' & ', $query)])
->where('published_at', '<=', now())
->orderBy('published_at', 'desc')
->cursorPaginate(10);
$rss_posts = RssPost::with('category')
->where('status', 'published')
->whereRaw("to_tsvector('english', title || ' ' || bites || ' ' || body) @@ plainto_tsquery('english', ?)", [trim(preg_replace('/\s+/', ' ', $query))])
->where('published_at', '<=', now())
->orderBy('published_at', 'desc')
->cursorPaginate(60);
// breadcrumb json ld
$listItems = [];
@@ -55,7 +62,7 @@ public function search(Request $request)
'itemListElement' => $listItems,
]);
return view('front.post_list', compact('posts', 'breadcrumbs', 'breadcrumb_context', 'title', 'page_type'));
return view('front.rss_post_list', compact('rss_posts', 'breadcrumbs', 'breadcrumb_context', 'title', 'page_type'));
}
public function index(Request $request)
@@ -64,10 +71,10 @@ public function index(Request $request)
$breadcrumbs = collect([
['name' => 'Home', 'url' => route('front.home')],
['name' => 'Latest News', 'url' => null], // or you can set a route for Latest News if there's a specific one
['name' => 'News Bites', 'url' => null], // or you can set a route for Latest News if there's a specific one
]);
$title = 'Latest News from FutureWalker';
$title = 'Latest News Bites from FutureWalker';
SEOTools::metatags();
SEOTools::twitter();
@@ -75,10 +82,10 @@ public function index(Request $request)
SEOTools::jsonLd();
SEOTools::setTitle($title, false);
$posts = Post::with('category')->where('status', 'publish')
$rss_posts = RssPost::with('category')->where('status', 'published')
->where('published_at', '<=', now())
->orderBy('published_at', 'desc')
->cursorPaginate(10) ?? collect();
->cursorPaginate(60) ?? collect();
// breadcrumb json ld
$listItems = [];
@@ -90,52 +97,46 @@ public function index(Request $request)
];
}
//dd($posts);
//dd($rss_posts);
$breadcrumb_context = Context::create('breadcrumb_list', [
'itemListElement' => $listItems,
]);
return view('front.post_list', compact('posts', 'breadcrumbs', 'breadcrumb_context', 'page_type'));
return view('front.rss_post_list', compact('rss_posts', 'breadcrumbs', 'breadcrumb_context', 'page_type'));
}
public function category(Request $request, $category_slug)
{
$page_type = 'default';
$page_type = 'category';
// Fetch the category by slug
$category = Category::where('slug', $category_slug)->first();
// Check if the category exists
if (! $category) {
abort(404, 'Category not found');
if (is_null($category))
{
abort(404);
}
// Breadcrumb logic
$breadcrumbs = collect([['name' => 'Home', 'url' => route('front.home')]]);
foreach ($category->ancestors as $ancestor) {
$breadcrumbs->push(['name' => $ancestor->name, 'url' => route('front.category', $ancestor->slug)]);
}
$breadcrumbs->push(['name' => $category->name, 'url' => route('front.category', $category->slug)]);
// Get the IDs of the category and its descendants
$categoryIds = $category->descendants->pluck('id')->push($category->id);
$breadcrumbs = collect([
['name' => 'Home', 'url' => route('front.home')],
['name' => $category->name, 'url' => null],
]);
// Get the posts associated with these category IDs
$postIds = PostCategory::whereIn('category_id', $categoryIds)->pluck('post_id');
$posts = Post::whereIn('id', $postIds)
->where('published_at', '<=', now())
->where('status', 'publish')
->orderBy('published_at', 'desc')
->cursorPaginate(10);
$title = 'Latest News from FutureWalker';
$title = $category->name.' News from FutureWalker';
SEOTools::metatags();
SEOTools::twitter();
SEOTools::opengraph();
SEOTools::setTitle($title, false);
SEOTools::jsonLd();
SEOTools::setTitle($title, false);
$rss_posts = RssPost::with('category')->where('status', 'published')
->where('category_id', $category->id)
->where('published_at', '<=', now())
->orderBy('published_at', 'desc')
->cursorPaginate(60) ?? collect();
// breadcrumb json ld
$listItems = [];
@@ -147,10 +148,12 @@ public function category(Request $request, $category_slug)
];
}
//dd($rss_posts);
$breadcrumb_context = Context::create('breadcrumb_list', [
'itemListElement' => $listItems,
]);
return view('front.post_list', compact('category', 'posts', 'breadcrumbs', 'breadcrumb_context', 'page_type'));
return view('front.rss_post_list', compact('rss_posts', 'breadcrumbs', 'breadcrumb_context', 'page_type','category'));
}
}

View File

@@ -28,7 +28,7 @@ public function redirect(Request $request, $slug)
public function index(Request $request, $category_slug, $slug)
{
$post = Post::where('slug', $slug)->whereIn('status', ['publish','future'])->first();
$post = Post::where('slug', $slug)->whereIn('status', ['publish', 'future'])->first();
if (is_null($post)) {
return abort(404);

View File

@@ -4,22 +4,31 @@
use App\Helpers\FirstParty\ImageGen\ImageGen;
use App\Http\Controllers\Controller;
use App\Jobs\Tasks\BrowseRSSLatestNewsTask;
use App\Models\Post;
use App\Notifications\PostWasPublished;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\Notification;
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
use LaravelGoogleIndexing;
use App\Models\Post;
use Illuminate\Support\Facades\Notification;
use App\Notifications\PostWasPublished;
class TestController extends Controller
{
public function opml(Request $request)
{
$raw_posts = BrowseRSSLatestNewsTask::handleSingle('https://hnrss.org/newest?q=ai', 240);
foreach ($raw_posts as $raw_post) {
dump($raw_post);
}
}
public function notification(Request $request)
{
$post = Post::find(1);
$post = Post::find(1);
Notification::route('facebook','default')->notify(new PostWasPublished($post));
Notification::route('facebook', 'default')->notify(new PostWasPublished($post));
}
public function imageGen(Request $request)

View File

@@ -11,7 +11,7 @@
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
class BrowseAndWriteWithAIJob implements ShouldQueue
class BrowseDFSAndWriteWithAIJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;

View File

@@ -0,0 +1,38 @@
<?php
namespace App\Jobs;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
class BrowseRSSPostJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public $timeout = 20;
protected $hours;
/**
* Create a new job instance.
*/
public function __construct($hours)
{
$this->hours = $hours;
}
/**
* Execute the job.
*/
public function handle(): void
{
$rss_urls = config('platform.global.rss');
foreach ($rss_urls as $rss_url) {
BrowseSingleRSSJob::dispatch($rss_url, $this->hours);
}
}
}

View File

@@ -0,0 +1,61 @@
<?php
namespace App\Jobs;
use App\Jobs\Tasks\BrowseRSSLatestNewsTask;
use App\Models\RssPost;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
class BrowseSingleRSSJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public $timeout = 20;
protected $rss_url;
protected $hours;
/**
* Create a new job instance.
*/
public function __construct($rss_url, $hours)
{
$this->rss_url = $rss_url;
$this->hours = $hours;
}
/**
* Execute the job.
*/
public function handle(): void
{
$raw_posts = BrowseRSSLatestNewsTask::handleSingle($this->rss_url, $this->hours);
foreach ($raw_posts as $raw_post) {
$rss_post = RssPost::where('post_url', $raw_post->link)->first();
if (is_null($rss_post)) {
$rss_post = new RssPost;
$rss_post->post_url = $raw_post->link;
$rss_post->source = $raw_post->source;
$rss_post->source_url = $raw_post->source_url;
$rss_post->title = remove_newline($raw_post->title);
$rss_post->slug = str_slug(remove_newline($raw_post->title));
$rss_post->published_at = $raw_post->date;
$rss_post->status = 'draft';
if ($rss_post->save()) {
CrawlRssPostJob::dispatch($rss_post->id)->onConnection('default')->onQueue('default');
}
}
}
}
}

View File

@@ -0,0 +1,35 @@
<?php
namespace App\Jobs;
use App\Jobs\Tasks\CrawlRssPostTask;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
class CrawlRssPostJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
protected $rss_post_id;
public $timeout = 15;
/**
* Create a new job instance.
*/
public function __construct($rss_post_id)
{
$this->rss_post_id = $rss_post_id;
}
/**
* Execute the job.
*/
public function handle(): void
{
CrawlRssPostTask::handle($this->rss_post_id);
}
}

View File

@@ -0,0 +1,35 @@
<?php
namespace App\Jobs;
use App\Jobs\Tasks\ParseRssPostMetadataTask;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
class ParseRssPostMetadataJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
protected $rss_post_id;
public $timeout = 240;
/**
* Create a new job instance.
*/
public function __construct(int $rss_post_id)
{
$this->rss_post_id = $rss_post_id;
}
/**
* Execute the job.
*/
public function handle(): void
{
ParseRssPostMetadataTask::handle($this->rss_post_id);
}
}

View File

@@ -0,0 +1,60 @@
<?php
namespace App\Jobs\Tasks;
use Carbon\Carbon;
use Vedmant\FeedReader\Facades\FeedReader;
class BrowseRSSLatestNewsTask
{
public static function handleMulti($hours = 3)
{
$rss_urls = config('platform.global.rss');
$raw_posts = [];
foreach ($rss_urls as $rss_url) {
$this_rss_posts = array_merge(self::handleSingle($rss_url, $hours));
foreach ($this_rss_posts as $item) {
$raw_posts[] = $item;
}
}
return $raw_posts;
}
public static function handleSingle($rss_url, $hours = 3)
{
$f = FeedReader::read($rss_url);
$raw_posts = [];
foreach ($f->get_items() as $item) {
$post_datetime = Carbon::parse($item->get_date(\DateTime::ATOM));
if (! $post_datetime->isBetween(now()->subHours($hours), now())) {
continue;
}
$title = trim($item->get_title());
$description = trim($item->get_content());
$raw_posts[] = (object) [
'source' => $f->get_title(),
'source_url' => $rss_url,
'title' => $title,
'link' => $item->get_link(),
'description' => $description,
'date' => $post_datetime,
'category' => $item->get_category()?->term,
];
}
unset($f);
return $raw_posts;
}
}

View File

@@ -0,0 +1,167 @@
<?php
namespace App\Jobs\Tasks;
use App\Jobs\ParseRssPostMetadataJob;
use App\Models\RssPost;
use Exception;
use Illuminate\Support\Facades\Http;
use League\HTMLToMarkdown\HtmlConverter;
use Symfony\Component\DomCrawler\Crawler;
class CrawlRssPostTask
{
public static function handle(int $rss_post_id)
{
$rss_post = RssPost::find($rss_post_id);
if (is_null($rss_post)) {
return null;
}
try {
$user_agent = config('platform.proxy.user_agent');
$response = Http::withHeaders([
'User-Agent' => $user_agent,
])
->withOptions([
'proxy' => get_smartproxy_rotating_server(),
'timeout' => 10,
'verify' => false,
])
->get($rss_post->post_url);
if ($response->successful()) {
$raw_html = $response->body();
$costs['unblocker'] = calculate_smartproxy_cost(round(strlen($raw_html) / 1024, 2), 'rotating_global');
} else {
$raw_html = null;
$response->throw();
}
} catch (Exception $e) {
$raw_html = null;
}
if (! is_empty($raw_html)) {
$rss_post->body = self::getMarkdownFromHtml($raw_html);
} else {
$rss_post->body = 'EMPTY CONTENT';
}
if ((is_empty($rss_post->body)) || ($rss_post->body == 'EMPTY CONTENT') || (strlen($rss_post->body) < 800)){
$rss_post->status = 'blocked';
}
if ($rss_post->save()) {
if (! in_array($rss_post->status, ['blocked', 'trashed'])) {
ParseRssPostMetadataJob::dispatch($rss_post->id)->onConnection('default')->onQueue('default');
}
}
}
private static function getMarkdownFromHtml($html)
{
$converter = new HtmlConverter([
'strip_tags' => true,
'strip_placeholder_links' => true,
]);
$html = self::cleanHtml($html);
$markdown = $converter->convert($html);
//dd($markdown);
$markdown = self::reverseLTGT($markdown);
$markdown = self::normalizeNewLines($markdown);
$markdown = self::removeDuplicateLines($markdown);
return html_entity_decode(markdown_to_plaintext($markdown));
}
private static function reverseLTGT($input)
{
$output = str_replace('&lt;', '<', $input);
$output = str_replace('&gt;', '>', $output);
return $output;
}
private static function removeDuplicateLines($string)
{
$lines = explode("\n", $string);
$uniqueLines = array_unique($lines);
return implode("\n", $uniqueLines);
}
private static function normalizeNewLines($content)
{
// Split the content by lines
$lines = explode("\n", $content);
$processedLines = [];
for ($i = 0; $i < count($lines); $i++) {
$line = trim($lines[$i]);
// If the line is an image markdown
if (preg_match("/^!\[.*\]\(.*\)$/", $line)) {
// And if the next line is not empty and not another markdown structure
if (isset($lines[$i + 1]) && ! empty(trim($lines[$i + 1])) && ! preg_match('/^[-=#*&_]+$/', trim($lines[$i + 1]))) {
$line .= ' '.trim($lines[$i + 1]);
$i++; // Skip the next line as we're merging it
}
}
// Add line to processedLines if it's not empty
if (! empty($line)) {
$processedLines[] = $line;
}
}
// Collapse excessive newlines
$result = preg_replace("/\n{3,}/", "\n\n", implode("\n", $processedLines));
// Detect and replace the pattern
$result = preg_replace('/^(!\[.*?\]\(.*?\))\s*\n\s*([^\n!]+)/m', '$1 $2', $result);
// Replace multiple spaces with a dash separator
$result = preg_replace('/ {2,}/', ' - ', $result);
return $result;
}
private static function cleanHtml($htmlContent)
{
$crawler = new Crawler($htmlContent);
// Define tags to remove completely
$tagsToRemove = ['script', 'style', 'svg', 'picture', 'form', 'footer', 'nav', 'aside'];
foreach ($tagsToRemove as $tag) {
$crawler->filter($tag)->each(function ($node) {
foreach ($node as $child) {
$child->parentNode->removeChild($child);
}
});
}
// Replace <span> tags with their inner content
$crawler->filter('span')->each(function ($node) {
$replacement = new \DOMText($node->text());
foreach ($node as $child) {
$child->parentNode->replaceChild($replacement, $child);
}
});
return $crawler->outerHtml();
}
}

View File

@@ -176,8 +176,6 @@ private static function setPostImage($post)
continue;
}
try {
$main_image_url = $serp_url_research->main_image;
@@ -215,7 +213,7 @@ private static function setPostImage($post)
$image->destroy();
break;
} catch (Exception $e) {
continue;
}

View File

@@ -96,9 +96,8 @@ public static function handle(NewsSerpResult $news_serp_result, $serp_counts = 1
continue;
}
if ($serp_url->picked == true)
{
continue;
if ($serp_url->picked == true) {
continue;
}
}

View File

@@ -0,0 +1,117 @@
<?php
namespace App\Jobs\Tasks;
use App\Helpers\FirstParty\OpenAI\OpenAI;
use App\Models\Category;
use App\Models\RssPost;
use App\Models\ServiceCostUsage;
class ParseRssPostMetadataTask
{
public static function handle(int $rss_post_id)
{
$rss_post = RssPost::find($rss_post_id);
if (is_null($rss_post)) {
return;
}
if (in_array($rss_post->status, ['blocked', 'trashed'])) {
return;
}
if (! is_null($rss_post->metadata)) {
$post_meta_response = $rss_post->metadata;
} else {
$post_meta_response = OpenAI::getRssPostMeta($rss_post->body, 1536, 30);
if ((isset($post_meta_response->output)) && (! is_null($post_meta_response->output))) {
$service_cost_usage = new ServiceCostUsage;
$service_cost_usage->cost = $post_meta_response->cost;
$service_cost_usage->name = 'openai-getRssPostMeta';
$service_cost_usage->reference_1 = 'rss_post';
$service_cost_usage->reference_2 = strval($rss_post->id);
$service_cost_usage->output = $post_meta_response;
$service_cost_usage->save();
}
}
$words_to_add_in_body = [];
if ((isset($post_meta_response->output)) && (! is_null($post_meta_response->output))) {
$rss_post->metadata = $post_meta_response;
if (isset($post_meta_response->output->title)) {
if (! is_empty($post_meta_response->output->title)) {
$rss_post->title = $post_meta_response->output->title;
$rss_post->slug = ($post_meta_response->output->title);
}
}
if (isset($post_meta_response->output->keywords)) {
if (count($post_meta_response->output->keywords) > 0) {
$rss_post->keywords = $post_meta_response->output->keywords;
foreach ($post_meta_response->output->keywords as $word)
{
$words_to_add_in_body[] = $word;
}
}
}
if (isset($post_meta_response->output->entities)) {
if (count($post_meta_response->output->entities) > 0) {
$rss_post->entities = $post_meta_response->output->entities;
foreach ($post_meta_response->output->entities as $word)
{
$words_to_add_in_body[] = $word;
}
}
}
if (isset($post_meta_response->output->summary)) {
if (! is_empty($post_meta_response->output->summary)) {
$rss_post->bites = $post_meta_response->output->summary;
}
}
if (isset($post_meta_response->output->society_impact)) {
if (! is_empty($post_meta_response->output->society_impact)) {
$rss_post->impact = $post_meta_response->output->society_impact;
}
}
if (isset($post_meta_response->output->society_impact_level)) {
if (! is_empty($post_meta_response->output->society_impact_level)) {
$rss_post->impact_level = $post_meta_response->output->society_impact_level;
}
}
// Category
$category_name = 'Updates';
if ((isset($post_meta_response->output->category)) && (! is_empty($post_meta_response->output->category))) {
$category_name = $post_meta_response?->output?->category;
}
$category = Category::where('name', $category_name)->first();
if (is_null($category)) {
$category = Category::where('name', 'Updates')->first();
}
$rss_post->category_id = $category->id;
}
$post_body = $rss_post->body;
$post_body .= implode($words_to_add_in_body);
$rss_post->body = $post_body;
$rss_post->status = 'published';
$rss_post->save();
}
}

View File

@@ -3,11 +3,11 @@
namespace App\Jobs\Tasks;
use App\Models\Post;
use App\Notifications\PostWasPublished;
use Exception;
use Illuminate\Support\Facades\Notification;
use LaravelFreelancerNL\LaravelIndexNow\Facades\IndexNow;
use LaravelGoogleIndexing;
use Illuminate\Support\Facades\Notification;
use App\Notifications\PostWasPublished;
class PublishIndexPostTask
{
@@ -37,12 +37,7 @@ public static function handle(int $post_id)
}
Notification::route('facebook','default')->notify(new PostWasPublished($post));
Notification::route('facebook', 'default')->notify(new PostWasPublished($post));
}
}

69
app/Models/RssPost.php Normal file
View File

@@ -0,0 +1,69 @@
<?php
/**
* Created by Reliese Model.
*/
namespace App\Models;
use Carbon\Carbon;
use Illuminate\Database\Eloquent\Model;
/**
* Class RssPost
*
* @property int $id
* @property int|null $category_id
* @property string $source
* @property string $source_url
* @property string $post_url
* @property string $title
* @property string $slug
* @property string|null $body
* @property string|null $keywords
* @property string|null $entities
* @property string|null $metadata
* @property string|null $bites
* @property string|null $impact
* @property string $impact_level
* @property Carbon $published_at
* @property string $status
* @property Carbon|null $created_at
* @property Carbon|null $updated_at
* @property Category|null $category
*/
class RssPost extends Model
{
protected $table = 'rss_posts';
protected $casts = [
'category_id' => 'int',
'published_at' => 'datetime',
'metadata' => 'object',
'keywords' => 'array',
'entities' => 'array',
];
protected $fillable = [
'category_id',
'source',
'source_url',
'post_url',
'title',
'slug',
'body',
'keywords',
'entities',
'metadata',
'bites',
'impact',
'impact_level',
'published_at',
'status',
];
public function category()
{
return $this->belongsTo(Category::class);
}
}

View File

@@ -3,10 +3,7 @@
namespace App\Notifications;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Notifications\Messages\MailMessage;
use Illuminate\Notifications\Notification;
use NotificationChannels\FacebookPoster\FacebookPosterChannel;
use NotificationChannels\FacebookPoster\FacebookPosterPost;
@@ -34,8 +31,8 @@ public function via(object $notifiable): array
return [FacebookPosterChannel::class];
}
public function toFacebookPoster($notifiable) {
public function toFacebookPoster($notifiable)
{
return (new FacebookPosterPost(str_first_sentence($this->post->bites)))->withLink(route('front.post', ['slug' => $this->post->slug, 'category_slug' => $this->post->category->slug]));
}
}