c爬虫抓取网页数据(调试环境:ASP.NETCoreWebAPI目标框架:.Net )
优采云 发布时间: 2021-09-27 12:17c爬虫抓取网页数据(调试环境:ASP.NETCoreWebAPI目标框架:.Net
)
调试环境:核心web API
目标框架:。净岩心2.2
开发工具:Visual Studio 2017
提供者:生长猪Jason song
在创业的过程中,我们经常编写爬虫来捕获相关的网页数据,因为我们没有基本数据。我们曾经在上使用httphelper类。Net框架捕获相应的网页,非常方便。现在这项技术正在不断地迭代和升级。跨平台。Net内核非常好。我们还需要在下捕获网页数据。Net核心,今天,我想介绍我最近写的一个小库。我希望它也能帮助你
JsHttpClient是一个简单灵活的.NETCore HTML页面爬行客户端库
Jshttpclient是一个客户端库,用于在上进行简单灵活的HTML页面捕获。网芯
安装方法1:Tools=nuget package manager=package manager控制台
在控制台中输入以下命令
PM> Install-Package JsHttpClient
安装方法2:Tools=nuget package manager=nuget管理解决方案包
快速启动
首先,在configureservices(iservice采集服务)上添加jshttpclient客户端服务
// Startup.cs
// 文章来源 http://blog.csdn.net/jasonsong2008
public void ConfigureServices(IServiceCollection services)
{
// Add JsHttpClient client services
// 添加 JsHttpClient
// Add by Jason.Song(成长的小猪) on 2019/04/23
services.AddJsHttpClient();
// 是否允许自动重定向功能,如果不需要,请使用以下方式,默认允许为 true
// services.AddJsHttpClient(new JsHttpClientOptions{ AllowAutoRedirect = false });
services.AddMvc().SetCompatibilityVersion(CompatibilityVersion.Version_2_2);
}
举例
using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;
using JasonSoft.Net.JsHttpClient.Http;
namespace JsHttpClient.WebApi.Controllers
{
///
/// Add by Jason.Song(成长的小猪) on 2019/04/24
/// http://blog.csdn.net/jasonsong2008
///
[Route("api/[controller]")]
[ApiController]
public class TestController : ControllerBase
{
private readonly IJsHttpClient _client;
///
/// 实例化
/// Add by Jason.Song(成长的小猪) on 2019/04/24
///
///
public TestController(IJsHttpClient client)
{
_client = client;
}
///
/// Asynchronous request test
/// 异步请求测试
/// Add by Jason.Song(成长的小猪) on 2019/04/24
/// http://blog.csdn.net/jasonsong2008
///
///
[HttpGet("HttpAsync")]
public async Task HttpAsync()
{
const string urlString = "https://blog.csdn.net/jasonsong2008";
var request = new JsHttpRequest {Uri = urlString};
//request.Method = HttpMethod.Get;
//request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*";
//request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36";
//request.Referer = "https://blog.csdn.net/";
//request.Host = "blog.csdn.net";
//request.Cookie = "";
//request.Timeout = 30;
//request.Add("Upgrade-Insecure-Requests", "1");
var response = await _client.SendAsync(request);
//response.Cookie
//response.ResultByte
return Content(response.Html, "text/html; charset=utf-8");
}
///
/// Synchronous request test
/// 同步请求测试
/// Add by Jason.Song(成长的小猪) on 2019/04/24
/// http://blog.csdn.net/jasonsong2008
///
///
[HttpGet("HttpSync")]
public IActionResult HttpSync()
{
const string urlString = "https://blog.csdn.net/jasonsong2008";
var request = new JsHttpRequest {Uri = urlString};
//request.Method = HttpMethod.Get;
//request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*";
//request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36";
//request.Referer = "https://blog.csdn.net/";
//request.Host = "blog.csdn.net";
//request.Cookie = "";
//request.Timeout = 30;
//request.Add("Upgrade-Insecure-Requests", "1");
var response = _client.Send(request);
//response.Cookie
//response.ResultByte
return Content(response.Html, "text/html; charset=utf-8");
}
}
}
欲了解更多信息原创文章,请点击此处