网页源代码抓取工具(Excel教程Excel函数Excel表格制作Excel2010Excel实用技巧Excel视频教程)

优采云 发布时间: 2021-10-23 03:10

  网页源代码抓取工具(Excel教程Excel函数Excel表格制作Excel2010Excel实用技巧Excel视频教程)

  更推荐方法一

  

///

/// 用httpwebrequest取得网页源码

/// 对于带bom的网页很有效,不管是什么编码都能正确识别

///

/// 网页地址"

/// 返回网页源文件

public static string gethtmlsource2(string url)

{

//处理内容

string html = "";

httpwebrequest request = (httpwebrequest)webrequest.create(url);

request.accept = "*/*"; //接受任意文件

request.useragent = "mozilla/4.0 (compatible; msie 6.0; windows nt 5.2; .net clr 1.1.4322)"; // 模拟使用ie在浏览 http://www.52mvc.com

request.allowautoredirect = true;//是否允许302

//request.cookiecontainer = new cookiecontainer();//cookie容器,

request.referer = url; //当前页面的引用

httpwebresponse response = (httpwebresponse)request.getresponse();

stream stream = response.getresponsestream();

streamreader reader = new streamreader(stream, encoding.default);

html = reader.readtoend();

stream.close();

return html;

}

  方法二

  

using system;

using system.collections.generic;

using system.linq;

using system.web;

using system.io;

using system.text;

using system.net;

namespace mysql

{

public class gethttpdata

{

public static string gethttpdata2(string url)

{

string sexception = null;

string srslt = null;

webresponse owebrps = null;

webrequest owebrqst = webrequest.create(url);

owebrqst.timeout = 50000;

try

{

owebrps = owebrqst.getresponse();

}

catch (webexception e)

{

sexception = e.message.tostring();

}

catch (exception e)

{

sexception = e.tostring();

}

finally

{

if (owebrps != null)

{

streamreader ostreamrd = new streamreader(owebrps.getresponsestream(), encoding.getencoding("utf-8"));

srslt = ostreamrd.readtoend();

ostreamrd.close();

owebrps.close();

}

}

return srslt;

}

}

}

  方法三

<p>

public static string gethtml(string url, params string [] charsets)//url是要访问的网站地址,charset是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码

{

try

{

string charset = null;

if (charsets.length == 1) {

charset = charsets[0];

}

webclient mywebclient = new webclient(); //创建webclient实例mywebclient

// 需要注意的:

//有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等

//这是就要具体问题具体分析比如在头部加入cookie

// webclient.headers.add("cookie", cookie);

//这样可能需要一些重载方法。根据需要写就可以了

//获取或设置用于对向 internet 资源的请求进行身份验证的网络凭据。

mywebclient.credentials = credentialcache.defaultcredentials;

//如果服务器要验证用户名,密码

//networkcredential mycred = new networkcredential(struser, strpassword);

//mywebclient.credentials = mycred;

//从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)

byte[] mydatabuffer = mywebclient.downloaddata(url);

string strwebdata = encoding.default.getstring(mydatabuffer);

//获取网页字符编码描述信息

match charsetmatch = regex.match(strwebdata, "

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线