地区代码整理在附件中citys.xml
说明:p代表省 n 省名称例如:B 北京,c 城市
返回的json数据示例:
Java 代码
{"weatherinfo":{"city":"上海","city_en":"shanghai","date_y":"2010年5月31日","date":"庚寅年四月十八","week":"星期一","fchh":"08","cityid":"101020100","temp1":"27℃~18℃","temp2":"26℃~18℃","temp3":"27℃~19℃","temp4":"27℃~20℃","temp5":"25℃~20℃","tempF1":"80.6ㄈ~64.4ㄈ","tempF2":"78.8ㄈ~64.4ㄈ","tempF3":"80.6ㄈ~66.2ㄈ","tempF4":"80.6ㄈ~68ㄈ","tempF5":"77ㄈ~68ㄈ","weather1":"多云","weather2":"多云","weather3":"晴转多云","weather4":"多云","weather5":"阴","img1":"1","img2":"99","img3":"1","img4":"99","img5":"0","img6":"1","img7":"1","img8":"99","img9":"2","img10":"99","img_single":"1","img_title1":"多云","img_title2":"多云","img_title3":"多云","img_title4":"多云","img_title5":"晴","img_title6":"多云","img_title7":"多云","img_title8":"多云","img_title9":"阴","img_title10":"阴","img_title_single":"多云","wind1":"东风3-4级","wind2":"东风3-4级","wind3":"东风3-4级","wind4":"东南风4-5级","wind5":"东南风转东风4-5级","fx1":"东风","fx2":"东风","fl1":"3-4级","fl2":"3-4级","fl3":"3-4级","fl4":"4-5级","fl5":"4-5级","index":"暂缺","index_d":"暂缺","index48":"暂缺","index48_d":"暂缺","index_uv":"弱","index48_uv":"弱","index_xc":"适宜","index_tr":"很适宜","index_co":"较舒适","st1":"26","st2":"17","st3":"25","st4":"17","st5":"25","st6":"18"}}
{"weatherinfo":{"city":"上海","city_en":"shanghai","date_y":"2010年5月31日","date":"庚寅年四月十八","week":"星期一","fchh":"08","cityid":"101020100","temp1":"27℃~18℃","temp2":"26℃~18℃","temp3":"27℃~19℃","temp4":"27℃~20℃","temp5":"25℃~20℃","tempF1":"80.6ㄈ~64.4ㄈ","tempF2":"78.8ㄈ~64.4ㄈ","tempF3":"80.6ㄈ~66.2ㄈ","tempF4":"80.6ㄈ~68ㄈ","tempF5":"77ㄈ~68ㄈ","weather1":"多云","weather2":"多云","weather3":"晴转多云","weather4":"多云","weather5":"阴","img1":"1","img2":"99","img3":"1","img4":"99","img5":"0","img6":"1","img7":"1","img8":"99","img9":"2","img10":"99","img_single":"1","img_title1":"多云","img_title2":"多云","img_title3":"多云","img_title4":"多云","img_title5":"晴","img_title6":"多云","img_title7":"多云","img_title8":"多云","img_title9":"阴","img_title10":"阴","img_title_single":"多云","wind1":"东风3-4级","wind2":"东风3-4级","wind3":"东风3-4级","wind4":"东南风4-5级","wind5":"东南风转东风4-5级","fx1":"东风","fx2":"东风","fl1":"3-4级","fl2":"3-4级","fl3":"3-4级","fl4":"4-5级","fl5":"4-5级","index":"暂缺","index_d":"暂缺","index48":"暂缺","index48_d":"暂缺","index_uv":"弱","index48_uv":"弱","index_xc":"适宜","index_tr":"很适宜","index_co":"较舒适","st1":"26","st2":"17","st3":"25","st4":"17","st5":"25","st6":"18"}}
导航189天气抓取示例:
Crawler.java
Java 代码
import java.util.TimerTask;
import org.apache.commons.lang.StringUtils;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.lexer.Lexer;
public abstract class Crawler extends TimerTask {
public String convert2String(String html) {
if (html != null) {
String str = html;
Lexer l = new Lexer(str);
Parser parser = new Parser(l);
StringBean sb = new StringBean();
try {
parser.visitAllNodesWith(sb);
} catch (Exception e) {
return str;
}
String ret = sb.getStrings();
if (ret != null) {
ret = ret.replace("\r", "");
ret = ret.replace("\t", "");
}
return ret;
}
return "";
}
public String formatString(String str) {
if (StringUtils.isBlank(str)) {
return "";
}
str = convert2String(str);
str = str.replaceAll("&", "&");
str = str.replaceAll("\"", "“").replaceAll("<", "").replaceAll("\"", "“");
str = str.trim();
return str;
}
}
import java.util.TimerTask;
import org.apache.commons.lang.StringUtils;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.lexer.Lexer;
public abstract class Crawler extends TimerTask {
public String convert2String(String html) {
if (html != null) {
String str = html;
Lexer l = new Lexer(str);
Parser parser = new Parser(l);
StringBean sb = new StringBean();
try {
parser.visitAllNodesWith(sb);
} catch (Exception e) {
return str;
}
String ret = sb.getStrings();
if (ret != null) {
ret = ret.replace("\r", "");
ret = ret.replace("\t", "");
}
return ret;
}
return "";
}
public String formatString(String str) {
if (StringUtils.isBlank(str)) {
return "";
}
str = convert2String(str);
str = str.replaceAll("&", "&");
str = str.replaceAll("\"", "“").replaceAll("<", "").replaceAll("\"", "“");
str = str.trim();
return str;
}
}
WeatherCrawler.java
Java 代码
import java.net.URI;
import java.util.Date;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;
import org.json.JSONException;
import org.json.JSONObject;
import com.dh189.util.Constants;
import com.dh189.util.Util;
public class WeatherCrawler extends Crawler {
private static final String GETURL = "http://m.weather.com.cn/data/";
DefaultHttpClient httpclient = new DefaultHttpClient();
public void start() {
String[] citys = Constants.WEATHERCITYCODE.split(",");
for (String city : citys) {
String url = GETURL + city + ".html";
String val = getWeather(url);
try {
JSONObject json = new JSONObject(val);
JSONObject j = json.getJSONObject("weatherinfo");
String alt1 = j.getString("weather1") + " " + j.getString("wind1");
String alt2 = j.getString("weather2") + " " + j.getString("wind2");
String html = "<div class=\"w_l\"> <div class=\"w_l_u\"> <table style=\"width: 35px;height: 45px;\"> <tr> <td style=\"color: #F24F00\"><b>"
+ j.getString("city")
+ "</b></td> </tr> </table> < /div> <div class=\"w_l_d\"><a href=\"javascript:dingCity(); \" title=\"定制天气预报\" target=\"_self\">定制</a> </div> < /div> <ul> <li class=\"w_r\" style=\"width: 222px;\"> <table style=\"height: 70px;width: 100%;float: left;\"> <tr valign=\"bottom\"> <td><img src=\"images/weather/b"
+ j.getString("img1") + ".gif\" height=\"30\"/></td> <td><img src=\"images/weather/b" + j.getString("img3")
+ ".gif\" height=\"30\"/></td> </tr> <tr> <td width=\"105\" style=\"line-height: 15px;\"><a href=\"http://www.weather.com.cn/html/weather/" + city + ".shtml\" title=\""
+ alt1 + "\">" + Util.getWeekOfDate(new Date()) + ":" + j.getString("temp1") + "<br/>" + j.getString("weather1")
+ "</a></td> <td width=\"105\" style=\"line-height: 15px;\"><a href=\"http://www.weather.com.cn/html/weather/" + city + ".shtml\" title=\"" + alt2 + "\">"
+ Util.getWeekOfDate(Util.dateAddOrReduce(new Date(), 1)) + ":" + j.getString("temp2") + "<br/>" + j.getString("weather2") + "</a></td></tr></table></li></ul>";
String dir = Constants.WEATHERDIR + city + ".html";
Util.writeSortFile(dir, html, false);
} catch (JSONException e) {
e.printStackTrace();
}
}
}
/**
* 处理GET请求,返回整个页面
*
* @param url
* @return
*/
public String getWeather(String url) {
String content = null;
try {
DefaultHttpClient httpclient = new DefaultHttpClient();
HttpConnectionParams.setConnectionTimeout(httpclient.getParams(), 30 * 1000);
HttpConnectionParams.setSoTimeout(httpclient.getParams(), 30 * 1000);
//伪装成火狐浏览器
HttpProtocolParams.setUserAgent(httpclient.getParams(), "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.9) Gecko/20100315 Firefox/3.5.9");
HttpGet httpget = new HttpGet();
content = "";
httpget.setURI(new URI(url));
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
content = null;
if (entity != null) {
content = EntityUtils.toString(entity);
httpget.abort();
httpclient.getConnectionManager().shutdown();
}
} catch (Exception e) {
e.printStackTrace();
}
return content;
}
@Override
public void run() {
start();
}
}
import java.net.URI;
import java.util.Date;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;
import org.json.JSONException;
import org.json.JSONObject;
import com.dh189.util.Constants;
import com.dh189.util.Util;
public class WeatherCrawler extends Crawler {
private static final String GETURL = "http://m.weather.com.cn/data/";
DefaultHttpClient httpclient = new DefaultHttpClient();
public void start() {
String[] citys = Constants.WEATHERCITYCODE.split(",");
for (String city : citys) {
String url = GETURL + city + ".html";
String val = getWeather(url);
try {
JSONObject json = new JSONObject(val);
JSONObject j = json.getJSONObject("weatherinfo");
String alt1 = j.getString("weather1") + " " + j.getString("wind1");
String alt2 = j.getString("weather2") + " " + j.getString("wind2");
String html = "<div class=\"w_l\"> <div class=\"w_l_u\"> <table style=\"width: 35px;height: 45px;\"> <tr> <td style=\"color: #F24F00\"><b>"
+ j.getString("city")
+ "</b></td> </tr> </table> </div> <div class=\"w_l_d\"><a href=\"javascript:dingCity();\" title=\"定制天气预报\" target=\"_self\">定制</a> </div> </div> <ul> <li class=\"w_r\" style=\"width: 222px;\"> <table style=\"height: 70px;width: 100%;float: left;\"> <tr valign=\"bottom\"> <td><img src=\"images/weather/b"
+ j.getString("img1") + ".gif\" height=\"30\"/></td> <td><img src=\"images/weather/b" + j.getString("img3")
+ ".gif\" height=\"30\"/></td> </tr> <tr> <td width=\"105\" style=\"line-height: 15px;\"><a href=\"http://www.weather.com.cn/html/weather/" + city + ".shtml\" title=\""
+ alt1 + "\">" + Util.getWeekOfDate(new Date()) + ":" + j.getString("temp1") + "<br/>" + j.getString("weather1")
+ "</a></td> <td width=\"105\" style=\"line-height: 15px;\"><a href=\"http://www.weather.com.cn/html/weather/" + city + ".shtml\" title=\"" + alt2 + "\">"
+ Util.getWeekOfDate(Util.dateAddOrReduce(new Date(), 1)) + ":" + j.getString("temp2") + "<br/>" + j.getString("weather2") + "</a></td></tr></table></li></ul>";
String dir = Constants.WEATHERDIR + city + ".html";
Util.writeSortFile(dir, html, false);
} catch (JSONException e) {
e.printStackTrace();
}
}
}
/**
* 处理GET请求,返回整个页面
*
* @param url
* @return
*/
public String getWeather(String url) {
String content = null;
try {
DefaultHttpClient httpclient = new DefaultHttpClient();
HttpConnectionParams.setConnectionTimeout(httpclient.getParams(), 30 * 1000);
HttpConnectionParams.setSoTimeout(httpclient.getParams(), 30 * 1000);
//伪装成火狐浏览器
HttpProtocolParams.setUserAgent(httpclient.getParams(), "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.9) Gecko/20100315 Firefox/3.5.9");
HttpGet httpget = new HttpGet();
content = "";
httpget.setURI(new URI(url));
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
content = null;
if (entity != null) {
content = EntityUtils.toString(entity);
httpget.abort();
httpclient.getConnectionManager().shutdown();
}
} catch (Exception e) {
e.printStackTrace();
}
return content;
}
@Override
public void run() {
start();
}
}