当前位置: 代码迷 >> C# >> C#实现简略的网页爬虫
  详细解决方案

C#实现简略的网页爬虫

热度:6   发布时间:2016-05-05 03:41:27.0
C#实现简单的网页爬虫
using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Net;using System.Text;using System.Text.RegularExpressions;using System.Threading.Tasks;namespace _2015._5._23通过WebClient类发起请求并下载html{    class Program    {        static void Main(string[] args)        {            #region 抓取网页邮箱            //string url = "http://zhidao.baidu.com/link?url=cvF0de2o9gkmk3zW2jY23TLEUs6wX-79E1DQVZG7qaBhEVT_xlh6TO7p0W4qwuAZ_InLymC_-mJBBcpdbzTeq_";            //WebClient wc = new WebClient();            //wc.Encoding = Encoding.UTF8;            //string str = wc.DownloadString(url);            //MatchCollection matchs=  Regex.Matches(str,@"\w+@([-\w])+([\.\w])+",RegexOptions.ECMAScript);            //foreach (Match item in matchs)            //{            //    Console.WriteLine(item.Value);            //}            //Console.WriteLine(matchs.Count);            #endregion             #region 抓取网页图片            //WebClient wc = new WebClient();            //wc.Encoding = Encoding.UTF8;            ////下载源网页代码            //string html = wc.DownloadString("http://dongxi.douban.com/?dcs=top-nav&dcm=douban");            //MatchCollection matches= Regex.Matches(html,"<img.*src=\"(.+?)\".*>");            //foreach (Match item in matches)            //{            //    //下载图片到指定路径            //    wc.DownloadFile(item.Groups[1].Value,@"c:\mv\"+Path.GetFileName(item.Groups[1].Value));            //}            //Console.WriteLine(matches.Count);            #endregion 爬一些信息            WebClient wc = new WebClient();            wc.Encoding = Encoding.UTF8;            string html = wc.DownloadString("http://www.lagou.com/");            MatchCollection matches= Regex.Matches(html,"<a.*jobs.*>(.*)</a>");            foreach (Match item in matches)            {                Console.WriteLine(item.Groups[1].Value);            }            Console.WriteLine(matches.Count);            Console.ReadKey();                                          }    }}

  相关解决方案