当前位置: 代码迷 >> C# >> 爬虫怎么抓取Post请求数据
  详细解决方案

爬虫怎么抓取Post请求数据

热度:132   发布时间:2016-05-05 04:31:36.0
爬虫如何抓取Post请求数据
C#爬虫程序如何抓取POST请求的数据?
数据源:http://msdn.itellyou.cn
------解决思路----------------------


 public class HttpProc
    {
        private bool _allowAutoRedirect;
        private CookieCollection _cookieGet;
        private CookieCollection _cookiePost;
        private WebHeaderCollection _customHeaders;
        private Encoding _encoding;
        private Version _httpVersion;
        private IWebProxy _Proxy;
        private string _ResHtml;
        private string _strAcceptType;
        private string _strCode;
        private string _strErr;
        private string _strPostdata;
        private string _strRefUrl;
        private string _strStatusDes;
        private string _strUrl;
        private string _strUserAgent;
        private bool _succeed;
        private int _timeOut;

       
        public HttpProc(string strURL, string PostData)
        {
            this._encoding = Encoding.UTF8;
            this._strAcceptType = "*/*";
            this._strUserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
            this._allowAutoRedirect = true;
            this._httpVersion = HttpVersion.Version11;
            this._customHeaders = new WebHeaderCollection();
            this._strUrl = strURL;
            this._strPostdata = PostData;
        }



        private HttpWebRequest CreateRequest()
        {
            this._strUrl = Formatter.CnUrlEncode(this.strUrl);

            HttpWebRequest request = null;
            request = (HttpWebRequest)WebRequest.Create(this._strUrl);
            request.Headers = this.customHeaders;
            request.Headers.Add("Accept-Encoding: gzip, deflate");
            request.CookieContainer = new CookieContainer();
            request.Referer = this.strRefUrl ?? this.strUrl;
            request.Accept = this.strAcceptType;
            request.ProtocolVersion = this.httpVersion;
            request.UserAgent = this.strUserAgent;
            request.AllowAutoRedirect = this.allowAutoRedirect;
            if (_timeOut > 0)
            {
                request.Timeout = _timeOut;
                request.ReadWriteTimeout = _timeOut;
            }
            if (this._cookiePost != null)
            {
                Uri uri = new Uri(this._strUrl);
                foreach (Cookie cookie in this._cookiePost)
                {
                    cookie.Domain = uri.Host;
                }
                request.CookieContainer.Add(this._cookiePost);
            }
            if ((this._strPostdata != null) && (this._strPostdata.Length > 0))
            {
                request.ContentType = "application/x-www-form-urlencoded";
                request.Method = "POST";
                byte[] bytes = this._encoding.GetBytes(this._strPostdata);
                request.ContentLength = bytes.Length;
                Stream requestStream = null;
                try
                {
                    requestStream = request.GetRequestStream();
                    requestStream.Write(bytes, 0, bytes.Length);
                }
                catch (Exception exception)
                {
                    this._strErr = exception.Message;
                }
                finally
                {
                    if (requestStream != null)
                    {
                        requestStream.Close();
                    }
                }
            }
            return request;
        }


        public string Proc()
        {
            HttpWebRequest request = this.CreateRequest();
            HttpWebResponse response = null;
            StreamReader reader = null;
            try
            {
                response = (HttpWebResponse)request.GetResponse();


                if ((response.Headers["Content-Encoding"] != null) && (response.Headers["Content-Encoding"].ToString().IndexOf("gzip") > -1))
                {

                    GZipStream gstream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress);
                    reader = new StreamReader(gstream, encoding);

                }
                else
                {
                    reader = new StreamReader(response.GetResponseStream(), this.encoding);
                }
               
                this._ResHtml = reader.ReadToEnd();
            }
            catch (Exception exception)
            {
                this._strErr = exception.Message;
                return "";
            }
            finally
            {
                if (reader != null)
                {
                    reader.Close();
                }
            }
            this._strCode = response.StatusCode.ToString();
            this._strStatusDes = response.StatusDescription.ToString();
            if (this._strCode == "302")
            {
                this._ResHtml = response.Headers["location"];
            }
            if (response.Cookies.Count > 0)
            {
                this._cookieGet = response.Cookies;
            }
            return this.ResHtml;
        }




        public static CookieCollection CookiesStrToCollection(string strCookies)
        {


            if (strCookies == "")
                return null;

            CookieCollection cookies = new CookieCollection();
            string[] textArray = strCookies.Trim().Split(';');
            foreach (string text in textArray)
            {
                int length = text.IndexOf('=');
                if (length > 0)
                {
                    cookies.Add(new Cookie(text.Substring(0, length).Trim(), text.Substring(length + 1, (text.Length - length) - 1)));
                }
            }


            return cookies;
        }


        public int Timeout
        {
            get {
                return _timeOut;
            }
            set {
                _timeOut = value;
            }
        }

        public bool allowAutoRedirect
        {
            get
            {
                return this._allowAutoRedirect;
            }
            set
            {
                this._allowAutoRedirect = value;
            }
        }

        public CookieCollection cookieGet
        {
            get
            {
                return this._cookieGet;
            }
            set
            {
                this._cookieGet = value;
            }
        }

        public CookieCollection cookiePost
        {
            get
            {
                return this._cookiePost;
            }
            set
            {
                this._cookiePost = value;
            }
        }

        public WebHeaderCollection customHeaders
        {
            get
            {
                return this._customHeaders;
            }
            set
            {
                this._customHeaders = value;
            }
        }

        public Encoding encoding
        {
            get
            {
                return this._encoding;
            }
            set
            {
                this._encoding = value;
            }
        }

        public Version httpVersion
        {
            get
            {
                return this._httpVersion;
            }
            set
            {
                this._httpVersion = value;
            }
        }

        public IWebProxy Proxy
        {
            get
            {
                return this._Proxy;
            }
            set
            {
                this._Proxy = value;
            }
        }

        public string ResHtml
        {
            get
            {
                return this._ResHtml;
            }
        }

        public string strAcceptType
        {
            get
            {
                return this._strAcceptType;
            }
            set
            {
                this._strAcceptType = value;
            }
        }

        public string strCode
        {
            get
            {
                return this._strCode;
            }
            set
            {
                this._strCode = value;
            }
        }

        public string strErr
        {
            get
            {
                return this._strErr;
            }
            set
            {
                this._strErr = value;
            }
        }

        public string strPostdata
        {
            get
            {
                return this._strPostdata;
            }
            set
            {
                this._strPostdata = value;
            }
        }

        public string strRefUrl
        {
            get
            {
                return this._strRefUrl;
            }
            set
            {
                this._strRefUrl = value;
            }
        }

        public string strStatusDes
        {
            get
            {
                return this._strStatusDes;
            }
            set
            {
                this._strStatusDes = value;
            }
        }

        public string strUrl
        {
            get
            {
                return this._strUrl;
            }
            set
            {
                this._strUrl = value;
            }
        }

        public string strUserAgent
        {
            get
            {
                return this._strUserAgent;
            }
            set
            {
                this._strUserAgent = value;
            }
        }

        public bool succeed
        {
            get
            {
                return this._succeed;
            }
            set
            {
                this._succeed = value;
            }
        }
    }
}


------解决思路----------------------
仅有表面,没有内容的网站,任怎样做广告都卖不出去啊。
  相关解决方案