php 抓取网页标题(风流不在谈锋胜,袖手无言味最长--C#)

优采云 发布时间: 2022-02-22 07:14

  php 抓取网页标题(风流不在谈锋胜,袖手无言味最长--C#)

  代码描绘生活 2020-08-20 173次浏览 原C#抓取带有网页参数的img src图片链接并下载

  关键词

  风流不谈风生,最长无言。本文章主要介绍C#如何爬取带有网页参数的img src图片链接,并下载相关知识,希望对大家有所帮助。

  

  using System;

using System.Collections.Generic;

using System.ComponentModel;

using System.Data;

using System.Drawing;

using System.IO;

using System.Linq;

using System.Net;

using System.Text;

using System.Text.RegularExpressions;

using System.Threading;

using System.Windows.Forms;

namespace ImageCollection

{

public partial class Form1 : Form

{

private static string Path = AppDomain.CurrentDomain.BaseDirectory + "img";

public Form1()

{

InitializeComponent();

}

private void btnshuaqu_Click(object sender, EventArgs e)

{

string url = txturl.Text.Trim();

if (string.IsNullOrEmpty(url))

{

MessageBox.Show("请输入URl");

return;

}

txtimg.AppendText("开始抓取中:\\r\\n");

Thread th = new Thread(() => ShuaQu(url)) { IsBackground = true };

th.Start();

}

private void ShuaQu(string url)

{

DirectoryInfo di = new DirectoryInfo(Path);

if (System.IO.Directory.Exists(Path))

{

di.Delete(true);

}

System.IO.Directory.CreateDirectory(Path);

string result = WebHttp.HttpGet(url, null, 3);

string[] str = GethtmlImageUrlList(result);

txtimg.Invoke(new Action(() =>

{

txtimg.AppendText("已经获取到数据!"+str.Count() + "\\r\\n");

}));

//建立获取网页标题正则表达式

String regex = @".+";

//返回网页标题

String title = Regex.Match(result, regex).ToString();

txttitle.Invoke(new Action(() => {

txttitle.Text = Regex.Replace(title, @"[\\""]+", "");

}));

foreach (string s in str)

{

Uri u = new Uri(s);

if (u.Host == "www.xxx.com")

{

Thread downimg = new Thread(() => Get_img(s)) { IsBackground = true };

downimg.Start();

txtimg.Invoke(new Action(() => {

txtimg.AppendText(s + "\\r\\n");

}));

}

}

txtimg.Invoke(new Action(() =>

{

txtimg.AppendText("全部抓取完成!\\r\\n");

}));

}

public void Get_img(string imgpath)

{

string[] file = imgpath.Split(\'?\');

string name = System.IO.Path.GetFileName(file[0]);

WebClient mywebclient = new WebClient();

mywebclient.DownloadFile(imgpath, Path + @"\\" + name);

//Bitmap img = null;

//HttpWebRequest req;

//HttpWebResponse res = null;

//try

//{

// System.Uri httpUrl = new System.Uri(imgpath);

// req = (HttpWebRequest)(WebRequest.Create(httpUrl));

// req.Timeout = 180000; //设置超时值10秒

// req.UserAgent = "XXXXX";

// req.Accept = "XXXXXX";

// req.Method = "GET";

// res = (HttpWebResponse)(req.GetResponse());

// img = new Bitmap(res.GetResponseStream());//获取图片流

// img.Save(Path + @"\\"+name);//随机名

//}

//catch (Exception ex)

//{

// string aa = ex.Message;

//}

//finally

//{

// res.Close();

//}

}

///

/// 取得HTML中所有图片的 URL。

///

/// HTML代码

/// 图片的URL列表

private string[] GetHtmlImageUrlList(string sHtmlText)

{

// 定义正则表达式用来匹配 img 标签

Regex regImg = new Regex(@"", RegexOptions.IgnoreCase);

// 搜索匹配的字符串

MatchCollection matches = regImg.Matches(sHtmlText);

int i = 0;

string[] sUrlList = new string[matches.Count];

// 取得匹配项列表

foreach (Match match in matches)

sUrlList[i++] = match.Groups["imgUrl"].Value;

return sUrlList;

}

}

}

  #region 下载图片到Image

public static Image UrlToImage(string url) {

WebClient mywebclient = new WebClient();

byte[] Bytes = mywebclient.DownloadData(url);

using (MemoryStream ms = new MemoryStream(Bytes)) {

Image outputImg = Image.FromStream(ms);

return outputImg;

}

}

#endregion

  这里是带有参数的img src图片链接,用于c#爬取网页,下载内容已经完成。如果您的问题无法解决,请参考以下文章:

  相关文章

  C#将图片保存为Base64格式

  jquery写的,把网站中的所有图片src变成URL格式的demo

  [当我用cheerio抓取img src时,我得到一个巨大的字符串,而不仅仅是链接

  php远程下载图片

  网页图片热点链接及坐标值

  beautifulsoup 库简单地抓取网页——获取所有链接示例

  从网页中抓取图像

  html 基础知识(imga 列表)

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线