C#爬虫程序如何抓取POST请求的数据?
数据源:http://msdn.itellyou.cn
------解决思路----------------------
public class HttpProc
{
private bool _allowAutoRedirect;
private CookieCollection _cookieGet;
private CookieCollection _cookiePost;
private WebHeaderCollection _customHeaders;
private Encoding _encoding;
private Version _httpVersion;
private IWebProxy _Proxy;
private string _ResHtml;
private string _strAcceptType;
private string _strCode;
private string _strErr;
private string _strPostdata;
private string _strRefUrl;
private string _strStatusDes;
private string _strUrl;
private string _strUserAgent;
private bool _succeed;
private int _timeOut;
public HttpProc(string strURL, string PostData)
{
this._encoding = Encoding.UTF8;
this._strAcceptType = "*/*";
this._strUserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
this._allowAutoRedirect = true;
this._httpVersion = HttpVersion.Version11;
this._customHeaders = new WebHeaderCollection();
this._strUrl = strURL;
this._strPostdata = PostData;
}
private HttpWebRequest CreateRequest()
{
this._strUrl = Formatter.CnUrlEncode(this.strUrl);
HttpWebRequest request = null;
request = (HttpWebRequest)WebRequest.Create(this._strUrl);
request.Headers = this.customHeaders;
request.Headers.Add("Accept-Encoding: gzip, deflate");
request.CookieContainer = new CookieContainer();
request.Referer = this.strRefUrl ?? this.strUrl;
request.Accept = this.strAcceptType;
request.ProtocolVersion = this.httpVersion;
request.UserAgent = this.strUserAgent;
request.AllowAutoRedirect = this.allowAutoRedirect;
if (_timeOut > 0)
{
request.Timeout = _timeOut;
request.ReadWriteTimeout = _timeOut;
}
if (this._cookiePost != null)
{
Uri uri = new Uri(this._strUrl);
foreach (Cookie cookie in this._cookiePost)
{
cookie.Domain = uri.Host;
}
request.CookieContainer.Add(this._cookiePost);
}
if ((this._strPostdata != null) && (this._strPostdata.Length > 0))
{
request.ContentType = "application/x-www-form-urlencoded";
request.Method = "POST";
byte[] bytes = this._encoding.GetBytes(this._strPostdata);
request.ContentLength = bytes.Length;
Stream requestStream = null;
try
{
requestStream = request.GetRequestStream();
requestStream.Write(bytes, 0, bytes.Length);
}
catch (Exception exception)
{
this._strErr = exception.Message;
}
finally
{
if (requestStream != null)
{
requestStream.Close();
}
}
}
return request;
}
public string Proc()
{
HttpWebRequest request = this.CreateRequest();
HttpWebResponse response = null;
StreamReader reader = null;
try
{
response = (HttpWebResponse)request.GetResponse();
if ((response.Headers["Content-Encoding"] != null) && (response.Headers["Content-Encoding"].ToString().IndexOf("gzip") > -1))
{
GZipStream gstream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress);
reader = new StreamReader(gstream, encoding);
}
else
{
reader = new StreamReader(response.GetResponseStream(), this.encoding);
}
this._ResHtml = reader.ReadToEnd();
}
catch (Exception exception)
{
this._strErr = exception.Message;
return "";
}
finally
{
if (reader != null)
{
reader.Close();
}
}
this._strCode = response.StatusCode.ToString();
this._strStatusDes = response.StatusDescription.ToString();
if (this._strCode == "302")
{
this._ResHtml = response.Headers["location"];
}
if (response.Cookies.Count > 0)
{
this._cookieGet = response.Cookies;
}
return this.ResHtml;
}
public static CookieCollection CookiesStrToCollection(string strCookies)
{
if (strCookies == "")
return null;
CookieCollection cookies = new CookieCollection();
string[] textArray = strCookies.Trim().Split(';');
foreach (string text in textArray)
{
int length = text.IndexOf('=');
if (length > 0)
{
cookies.Add(new Cookie(text.Substring(0, length).Trim(), text.Substring(length + 1, (text.Length - length) - 1)));
}
}
return cookies;
}
public int Timeout
{
get {
return _timeOut;
}
set {
_timeOut = value;
}
}
public bool allowAutoRedirect
{
get
{
return this._allowAutoRedirect;
}
set
{
this._allowAutoRedirect = value;
}
}
public CookieCollection cookieGet
{
get
{
return this._cookieGet;
}
set
{
this._cookieGet = value;
}
}
public CookieCollection cookiePost
{
get
{
return this._cookiePost;
}
set
{
this._cookiePost = value;
}
}
public WebHeaderCollection customHeaders
{
get
{
return this._customHeaders;
}
set
{
this._customHeaders = value;
}
}
public Encoding encoding
{
get
{
return this._encoding;
}
set
{
this._encoding = value;
}
}
public Version httpVersion
{
get
{
return this._httpVersion;
}
set
{
this._httpVersion = value;
}
}
public IWebProxy Proxy
{
get
{
return this._Proxy;
}
set
{
this._Proxy = value;
}
}
public string ResHtml
{
get
{
return this._ResHtml;
}
}
public string strAcceptType
{
get
{
return this._strAcceptType;
}
set
{
this._strAcceptType = value;
}
}
public string strCode
{
get
{
return this._strCode;
}
set
{
this._strCode = value;
}
}
public string strErr
{
get
{
return this._strErr;
}
set
{
this._strErr = value;
}
}
public string strPostdata
{
get
{
return this._strPostdata;
}
set
{
this._strPostdata = value;
}
}
public string strRefUrl
{
get
{
return this._strRefUrl;
}
set
{
this._strRefUrl = value;
}
}
public string strStatusDes
{
get
{
return this._strStatusDes;
}
set
{
this._strStatusDes = value;
}
}
public string strUrl
{
get
{
return this._strUrl;
}
set
{
this._strUrl = value;
}
}
public string strUserAgent
{
get
{
return this._strUserAgent;
}
set
{
this._strUserAgent = value;
}
}
public bool succeed
{
get
{
return this._succeed;
}
set
{
this._succeed = value;
}
}
}
}
------解决思路----------------------
仅有表面,没有内容的网站,任怎样做广告都卖不出去啊。