HttpClient :
?
?
最近要 抓取其他网站的数据, 尝试了 HttpClient
?
http://hc.apache.org/? ?官网提供了很详细了的 例子
?
尝试连接的次数
?
HttpRequestRetryHandler myRetryHandler = new ?HttpRequestRetryHandler() {?
public boolean retryRequest(IOException exception, int executionCount,HttpContext context) {?
System.out.println("尝试连接次数:-------:"+executionCount);
if (executionCount >= 5) {?
// 如果超过最大重试次数,那么就不要继续了?
return false;?
}?
if (exception instanceof NoHttpResponseException) {?
// 如果服务器丢掉了连接,那么就重试?
return true;?
}?
if (exception instanceof SSLHandshakeException) {?
// 不要重试SSL握手异常?
return false;?
}?
?
HttpRequest request = (HttpRequest) context.getAttribute(
ExecutionContext.HTTP_REQUEST);?
boolean idempotent = !(request instanceof?
HttpEntityEnclosingRequest);?
if (idempotent) {?
// 如果请求被认为是幂等的,那么就重试?
return true;?
}
return false;?
}?
};?
?
?
?
GET:
?
@Test
public void test_OOC(){
DefaultHttpClient httpclient = new DefaultHttpClient();
try {
//设置
httpclient.setHttpRequestRetryHandler(myRetryHandler);?
List<NameValuePair> formparams = new?
ArrayList<NameValuePair>();?
formparams.add(new BasicNameValuePair("eltype", "ct"));?
formparams.add(new BasicNameValuePair("cont_no", "XXX"));?
URI uri = URIUtils.createURI("http", "www.XXXX.com", -1,
"XXXXXXX",?
URLEncodedUtils.format(formparams, "UTF-8"), null);?
HttpGet httpget = new HttpGet(uri);
System.out.println(httpget.getURI());
//获得String 的方法
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = httpclient.execute(httpget, responseHandler);
//处理HTML
//Document doc = Jsoup.parse(responseBody);
//Elements elements = doc.select("#printContent");
saveFile("OOCL",responseBody);
} catch (Exception e) {
e.printStackTrace();
} finally {
httpclient.getConnectionManager().shutdown();
}
}
?
?
?
Post:
?
List<NameValuePair> formparams = new?
ArrayList<NameValuePair>();?
formparams.add(new BasicNameValuePair("userId", "null"));?
formparams.add(new BasicNameValuePair("iId", "182000"));?
formparams.add(new BasicNameValuePair("NameList", ""));?
formparams.add(new BasicNameValuePair("type", "A"));?
UrlEncodedFormEntity urlEntity = new UrlEncodedFormEntity(formparams, "UTF-8");
HttpPost httppost = new HttpPost("xxxxxxxx");?
?
?
?
?
?
public static void saveFile(String fileName,String data){
BufferedWriter bw= null;
try{
String ? path="C:\\Users\\Thomas\\Desktop\\Tracking\\"+fileName+".html";
File file = new File(path);
if(!file.exists()){
file.createNewFile();
}
bw = new BufferedWriter(new FileWriter(path));
bw.write(data) ;
}catch(Exception e){
e.printStackTrace();
}finally{
if(null!=bw){
try {
bw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
?
?
?
JSOUP:
?
官网:www.jsoup.com
?
他的解析功能也很 强大,类似 JQUERY ,比较好用, 至于HTML parser ?就没有 ?尝试使用了
?
?
POST:
?
Document doc = Jsoup.connect("http://XXXXXXXp").data("tr_num", "unit_no").data("tf_bl_no","XXXX").post();
Elements elements = doc.select("table").eq(2);
?
?
?
get:
?
Document doc = Jsoup.connect("http://www.cma-cgm.com/eBusiness/Tracking/Default.aspx").data("ContNum", "CMAU5364714").get();
?
?
?