using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Net;using System.Text;using System.Text.RegularExpressions;using System.Threading.Tasks;namespace _2015._5._23通过WebClient类发起请求并下载html{ class Program { static void Main(string[] args) { #region 抓取网页邮箱 //string url = "http://zhidao.baidu.com/link?url=cvF0de2o9gkmk3zW2jY23TLEUs6wX-79E1DQVZG7qaBhEVT_xlh6TO7p0W4qwuAZ_InLymC_-mJBBcpdbzTeq_"; //WebClient wc = new WebClient(); //wc.Encoding = Encoding.UTF8; //string str = wc.DownloadString(url); //MatchCollection matchs= Regex.Matches(str,@"\w+@([-\w])+([\.\w])+",RegexOptions.ECMAScript); //foreach (Match item in matchs) //{ // Console.WriteLine(item.Value); //} //Console.WriteLine(matchs.Count); #endregion #region 抓取网页图片 //WebClient wc = new WebClient(); //wc.Encoding = Encoding.UTF8; ////下载源网页代码 //string html = wc.DownloadString("http://dongxi.douban.com/?dcs=top-nav&dcm=douban"); //MatchCollection matches= Regex.Matches(html,"<img.*src=\"(.+?)\".*>"); //foreach (Match item in matches) //{ // //下载图片到指定路径 // wc.DownloadFile(item.Groups[1].Value,@"c:\mv\"+Path.GetFileName(item.Groups[1].Value)); //} //Console.WriteLine(matches.Count); #endregion 爬一些信息 WebClient wc = new WebClient(); wc.Encoding = Encoding.UTF8; string html = wc.DownloadString("http://www.lagou.com/"); MatchCollection matches= Regex.Matches(html,"<a.*jobs.*>(.*)</a>"); foreach (Match item in matches) { Console.WriteLine(item.Groups[1].Value); } Console.WriteLine(matches.Count); Console.ReadKey(); } }}
详细解决方案
C#实现简略的网页爬虫
热度:6 发布时间:2016-05-05 03:41:27.0
相关解决方案