HtmlAgilityPack爬取美女图片、每日一文文章、小说

HtmlAgilityPack?

HtmlAgilityPack 是 .NET 下的一个 HTML 解析类库。支持用 XPath 来解析 HTML 。命名空间: HtmlAgilityPack

爬取美女图片核心代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
static void Main(string[] args)
{
HtmlWeb web = new HtmlWeb();
string path = @"F:\pic\";
for (int i = 88; i >= 1; i--)
{
var url = "http://jandan.net/ooxx/page-" + i;
HtmlDocument doc = web.Load(url);
List<HtmlNode> nodeList = doc.DocumentNode.SelectNodes("//*[@class=\"commentlist\"]/li").AsParallel().ToList();
foreach (var item in nodeList)
{
HtmlNode imghtml = item.SelectSingleNode(".//img");
var imgsrc = "http:" + imghtml.Attributes["src"].Value;
var imgname = Guid.NewGuid().ToString() + imgsrc.Substring(imgsrc.Length - 4, 4);
Console.WriteLine(imgsrc);
DownPic(imgsrc, path + imgname);
}
}
}

爬取每日一文核心代码

1
2
3
4
5
6
7
8
9
10
11
12
var url = "https://meiriyiwen.com/random/";
HtmlWeb web = new HtmlWeb();
HtmlDocument doc = web.Load(url);
HtmlNode node = doc.DocumentNode.SelectSingleNode("//*[@id=\"article_show\"]");
node.Descendants()
.Where(n => n.Name == "script" || n.Name == "style" || n.Name == "#comment" || n.Id == "bdshare")
.ToList().ForEach(n => n.Remove());
var title = node.SelectSingleNode("//h1").InnerText;
var author = node.SelectSingleNode("//*[@class=\"article_author\"]").InnerText;
var article = node.SelectSingleNode("//*[@class=\"article_text\"]").InnerText.TrimStart();

爬取小说核心代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
static void Main(string[] args)
{
var domain = "http://www.jueshitangmen.info";
HtmlWeb web = new HtmlWeb();
HtmlDocument doc = web.Load(domain + "/zhetian/");
List<HtmlNode> nodeList = doc.DocumentNode.SelectNodes("//*[@class=\"panel\"]/ul/li").AsParallel().ToList();
foreach (var item in nodeList)
{
HtmlNode html = item.SelectSingleNode(".//span/a");
var title = html.InnerText;
var url = html.Attributes["href"].Value;
HtmlDocument document = web.Load(url);
HtmlNode node = document.DocumentNode.SelectSingleNode("//*[@class=\"content\"]");
node.Descendants()
.Where(n => n.Name == "script" || n.Name == "style" || n.Name == "#comment")
.ToList().ForEach(n => n.Remove());
var content = node.InnerText.Trim();
Console.WriteLine(title + "\r\n");
Console.WriteLine(content);
StreamWriter stream = new StreamWriter(@"F:\Article\遮天\" + title + ".txt");
stream.WriteLine(title + "\r\n");
stream.WriteLine(content);
stream.Close();
stream.Dispose();
}
}

源代码下载

HtmlAgilityPackDemo

×

谢谢你请我吃辣条

扫码支持
扫码打赏,你说多少就多少

打开支付宝扫一扫,即可进行扫码打赏哦

文章目录
  1. 1. HtmlAgilityPack?
  2. 2. 爬取美女图片核心代码
  3. 3. 爬取每日一文核心代码
  4. 4. 爬取小说核心代码
  5. 5. 源代码下载
,