This commit is contained in:
2023-11-28 04:39:36 +08:00
parent a9ac0e48b3
commit dc37274b6c
86 changed files with 2106 additions and 191 deletions

View File

@@ -23,25 +23,29 @@ public static function handle(int $url_to_crawl_id)
return null;
}
$enable_proxy = false;
$url_to_crawl->is_crawling = true;
$url_to_crawl->save();
$url_to_crawl->refresh();
try {
$user_agent = config('platform.proxy.user_agent');
// try {
$user_agent = config('platform.proxy.user_agent');
$response = Http::withHeaders([
'User-Agent' => $user_agent,
$response = Http::withHeaders([
'User-Agent' => $user_agent,
])
->withOptions([
'proxy' => ($enable_proxy) ? get_smartproxy_rotating_server() : null,
'timeout' => 10,
'verify' => false,
])
->withOptions([
'proxy' => get_smartproxy_rotating_server(),
'timeout' => 10,
'verify' => false,
])
->get($url_to_crawl->url);
->get($url_to_crawl->url);
if ($response->successful()) {
$raw_html = $response->body();
if ($response->successful()) {
$raw_html = $response->body();
if ($enable_proxy)
{
$cost = calculate_smartproxy_cost(round(strlen($raw_html) / 1024, 2), 'rotating_global');
$service_cost_usage = new ServiceCostUsage;
@@ -51,17 +55,19 @@ public static function handle(int $url_to_crawl_id)
$service_cost_usage->reference_2 = strval($url_to_crawl_id);
$service_cost_usage->output = self::getMarkdownFromHtml($raw_html);
$service_cost_usage->save();
} else {
$raw_html = null;
$response->throw();
}
} catch (Exception $e) {
} else {
$raw_html = null;
//throw $e;
$response->throw();
}
// } catch (Exception $e) {
// $raw_html = null;
// //throw $e;
// }
if (! is_empty($raw_html)) {
$url_to_crawl->output_type = 'markdown';
$url_to_crawl->output = self::getMarkdownFromHtml($raw_html);