http://v.youku.com/v_playlist/f17252787o1p0.html#replyLocation
有一个优酷的视频,想要获取该视频的评论,但是从源码来分析不到这些评论,评论隐藏到一个frame框架。
------解决方案--------------------
GET /comments/~ajax/vpcommentContent.html?
参数:__ap={"id":"XMzcyNjAwNzM2","sid":354909363,"page":4,"last_modify":1333031704}
__callback=displayComments
__ai=
------解决方案--------------------
- Java code
package other;import java.io.IOException;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.apache.commons.httpclient.HttpClient;import org.apache.commons.httpclient.HttpException;import org.apache.commons.httpclient.methods.GetMethod;public class 获取视频评论 { public static String escape (String src) { int i; char j; StringBuffer tmp = new StringBuffer(); tmp.ensureCapacity(src.length()*6); for (i=0;i<src.length() ;i++ ) { j = src.charAt(i); if (Character.isDigit(j) || Character.isLowerCase(j) || Character.isUpperCase(j)) tmp.append(j); else if (j<256) { tmp.append( "%" ); if (j<16) tmp.append( "0" ); tmp.append( Integer.toString(j,16) ); } else { tmp.append( "%u" ); tmp.append( Integer.toString(j,16) ); } } return tmp.toString(); } public static String unescape (String src) { StringBuffer tmp = new StringBuffer(); tmp.ensureCapacity(src.length()); int lastPos=0,pos=0; char ch; while (lastPos<src.length()) { pos = src.indexOf("%",lastPos); if (pos == lastPos) { if (src.charAt(pos+1)=='u') { ch = (char)Integer.parseInt(src.substring(pos+2,pos+6),16); tmp.append(ch); lastPos = pos+6; } else { ch = (char)Integer.parseInt(src.substring(pos+1,pos+3),16); tmp.append(ch); lastPos = pos+3; } } else { if (pos == -1) { tmp.append(src.substring(lastPos)); lastPos=src.length(); } else { tmp.append(src.substring(lastPos,pos)); lastPos=pos; } } } return tmp.toString(); } public static void main(String[]args){ HttpClient client = new HttpClient(); GetMethod get = new GetMethod("http://v.youku.com/v_playlist/f17252787o1p0.html#replyLocation"); GetMethod get2 = new GetMethod("http://comments.youku.com/comments/~ajax/vpcommentContent.html?__ap=%7B%22id%22%3A%22XMzcyOTQ0MTY4%22%2C%22sid%22%3A355016202%2C%22page%22%3A2%2C%22last_modify%22%3A1333080602%7D&__ai=&__callback=displayComments"); Pattern pp = Pattern.compile("<p id=\\\"content_.*?>(.*?)<"); try { System.out.println(client.executeMethod(get)); System.out.println(client.executeMethod(get2)); String rsult=get2.getResponseBodyAsString(); rsult=rsult.replaceAll("\\\\\"","\""); Matcher mm = pp.matcher(rsult); while(mm.find()){ String yy=mm.group(1).replaceAll("\\\\","%"); System.out.println(unescape(yy)); } } catch (HttpException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println(); }}