当前位置: 萬仟网 > IT编程>开发语言>.net > 利用HtmlAgilityPack插件写的一个抓取指定网页的图片 第一次写 很乱 随便看看就行

利用HtmlAgilityPack插件写的一个抓取指定网页的图片 第一次写 很乱 随便看看就行

2019年07月12日  | 萬仟网IT编程  | 我要评论
public partial class Form1 : Form { /// <summary> /// 存放图片地址 /// </summary> List<string> ImgList = new List<string>(); /// <summary> /// 当前下载文件 /// </ ...


public partial class form1 : form
{
/// <summary>
/// 存放图片地址
/// </summary>
list<string> imglist = new list<string>();
/// <summary>
/// 当前下载文件
/// </summary>
int _loadfile = 0;
//图片标题
string title = "";
/// <summary>
/// 文件总数
/// </summary>
int _totalfile = 0;
string[] exts = {
".bmp", ".dib", ".jpg", ".jpeg",
".jpe", ".jfif", ".png", ".gif",
".tif", ".tiff" };

public form1()
{
initializecomponent();

control.checkforillegalcrossthreadcalls = false;

}

private void form1_load(object sender, eventargs e)
{

this.comboboxedit1.properties.items.add("utf-8");
this.comboboxedit1.properties.items.add("gb2312");
}
/// <summary>
/// 获取当前页面图片数量
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button1_click(object sender, eventargs e)
{
getimgs();
}

/// <summary>
/// 下载图片
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button2_click(object sender, eventargs e)
{
try
{
this.textbox1.clear();
if (imglist.count <= 0) return;
//重置加载文件数
_loadfile = 0;
int index = 1;
task.factory.startnew(() =>
{

foreach (var item in imglist)
{
webclient webclient = new webclient();
webclient.downloadprogresschanged += new downloadprogresschangedeventhandler(webclient_downloadprogresschanged);
webclient.downloadfilecompleted += new asynccompletedeventhandler(webclient_downloadfilecompleted);
webclient.proxy = null;
uri uri = new uri(item);

if (!directory.exists(system.environment.currentdirectory + "\\img"))
{
directory.createdirectory(system.environment.currentdirectory + "\\img");

}
var imghouzhui = item.substring(item.lastindexof(".")).substring(0, 4);

 

string filename = title == "" ? guid.newguid().tostring() : title + "_" + index + imghouzhui;
webclient.downloadfileasync(uri, system.environment.currentdirectory + "\\img\\" + filename);
index++;
}

 

});
}
catch (exception ex)
{

messagebox.show(ex.message);
}

 


}
/// <summary>
/// 下载文件进度条
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void webclient_downloadprogresschanged(object sender, downloadprogresschangedeventargs e)
{
this.invoke(new methodinvoker(delegate
{
this.progressbar2.value = e.progresspercentage;
this.label2.text = string.format("正在下载文件,完成进度{0}% {1}/{2}(字节)"
, e.progresspercentage
, e.bytesreceived
, e.totalbytestoreceive);
}));

 

}
/// <summary>
/// 抓取https://www.mntup.com/网站写真
/// </summary>
public void getimgs()
{
this.textbox1.clear();
this.progressbar1.value = 0;
this.progressbar2.value = 0;
this.label2.text = "单个文件进度:";
this.label1.text = "总进度:";
imglist.clear();
htmlweb htmlweb = new htmlweb();
if (textbox2.text.trim().length <= 0 || comboboxedit1.selectedtext == "")
{
return;
}
try
{
htmlweb.overrideencoding = encoding.getencoding(comboboxedit1.selectedtext.tostring());


int pageminindex = convert.toint32(pagemin.value);
int pagemaxindex = convert.toint32(pagemax.value);
this.textbox1.appendtext("抓取到的图片地址");
for (int i = pageminindex; i <= pagemaxindex; i++)
{
string url = this.textbox2.text.trim().tostring();
if (i >= 2)
{

url = url.substring(0, url.lastindexof(".")).tostring() + "_" + i + ".html";
}

htmlagilitypack.htmldocument htmldocument = htmlweb.load(url);
//if (htmldocument.documentnode.innertext.contains("未找到")) return;

////*[@id="big-pic"]

htmlnodecollection nodes = null;
if (url.contains("https://www.mntup.com"))
{
title = htmldocument.documentnode.selectsinglenode("//div[@class='title']").innertext;
nodes = htmldocument.documentnode.selectnodes("//img");
}
else if (url.startswith("http://www.mmonly.cc", stringcomparison.ordinalignorecase))
{
title = htmldocument.documentnode.selectsinglenode("//h1").innertext.substring(0, htmldocument.documentnode.selectsinglenode("//h1").innertext.length - 5);
nodes = htmldocument.documentnode.selectnodes("//div[@id='big-pic']//img");


}
else
{

title = htmldocument.documentnode.selectsinglenode("//div[@class='title']")?.innertext;
nodes = htmldocument.documentnode.selectnodes("//img");

}
bool flag2 = nodes == null || nodes.count <= 0;

if (flag2)
{
messagebox.show($@"当前页{i}未找到图片,或没有第{i}页");
imglist.clear();
textbox1.clear();
return;
}
int index = this.textbox2.text.trim().indexof(".com");
string urls = this.textbox2.text.trim().tostring().substring(0, 21);
foreach (htmlnode item in nodes)
{
//https://www.mntup.com/youmi/zhangyumeng_38bebee5.html
string houzui = item.attributes["src"]?.value;
if (string.isnullorempty(houzui)) continue;
houzui = houzui.substring(houzui.lastindexof("."), 4);
if (houzui != ".jpg")
{
continue;
};
string imgurl = "";
if (!item.attributes["src"].value.startswith("http") &&
!item.attributes["src"].value.startswith("https"))
{


imgurl = urls + item.attributes["src"].value;
}
else
{
imgurl = item.attributes["src"].value;
}
this.textbox1.appendtext(imgurl + "\r\n");
this.imglist.add(imgurl);

}
}

//imglist = imglist.distinct().tolist();
this._totalfile = imglist.count;
this.textbox1.appendtext("总共获取图片" + imglist.count);

 


}
catch (exception ex)
{
messagebox.show(ex.message);
return;
}
}
/// <summary>
/// 文件下载时事件
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void webclient_downloadfilecompleted(object sender, asynccompletedeventargs e)
{
//https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%e8%90%9d%e8%8e%89&oq=%e8%90%9d%e8%8e%89&rsp=-1
_loadfile++;

int percent = (int)(100.0 * _loadfile / _totalfile);

this.invoke(new methodinvoker(delegate
{
this.progressbar1.value = percent;
this.label1.text = string.format("已完成文件下载{0}% {1}/{2}(文件个数)"
, percent
, _loadfile
, _totalfile);
}));
this.textbox1.invoke(new action(() =>
{
textbox1.appendtext($"正在下载第{_loadfile}张......\r\n");

}));


if (sender is webclient)
{
((webclient)sender).cancelasync();
((webclient)sender).dispose();


}
if (percent == 100)
{

this.textbox1.invoke(new action(() =>
{
this.textbox1.appendtext("下载完毕");
}));

}
}
}

如您对本文有疑问或者有任何想说的,请点击进行留言回复,万千网友为您解惑!

相关文章:

验证码:
Copyright © 2017-2021  萬仟网 保留所有权利. 粤ICP备17035492号-1
站长QQ:2386932994 | 联系邮箱:2386932994@qq.com