from :?http://gcgmh.iteye.com/blog/474093
?
?
- import?java.net.URL;??
- ??
- import?junit.framework.TestCase;??
- ??
- import?org.apache.log4j.Logger;??
- import?org.htmlparser.Node;??
- import?org.htmlparser.NodeFilter;??
- import?org.htmlparser.Parser;??
- import?org.htmlparser.Tag;??
- import?org.htmlparser.beans.LinkBean;??
- import?org.htmlparser.filters.NodeClassFilter;??
- import?org.htmlparser.filters.OrFilter;??
- import?org.htmlparser.filters.TagNameFilter;??
- import?org.htmlparser.tags.HeadTag;??
- import?org.htmlparser.tags.ImageTag;??
- import?org.htmlparser.tags.InputTag;??
- import?org.htmlparser.tags.LinkTag;??
- import?org.htmlparser.tags.OptionTag;??
- import?org.htmlparser.tags.SelectTag;??
- import?org.htmlparser.tags.TableColumn;??
- import?org.htmlparser.tags.TableRow;??
- import?org.htmlparser.tags.TableTag;??
- import?org.htmlparser.tags.TitleTag;??
- import?org.htmlparser.util.NodeIterator;??
- import?org.htmlparser.util.NodeList;??
- import?org.htmlparser.util.ParserException;??
- import?org.htmlparser.visitors.HtmlPage;??
- import?org.htmlparser.visitors.NodeVisitor;??
- import?org.htmlparser.visitors.ObjectFindingVisitor;??
- ??
- public?class?T?extends?TestCase?{??
- ??
- ??private?static?final?Logger?logger?=?Logger.getLogger(T.class);??
- ??
- ??public?T(String?name)?{??
- ????super(name);??
- ??}??
- ??
- ??/*?
- ???*?测试ObjectFindVisitor的用法?
- ???*/??
- ??public?void?testImageVisitor()?{??
- ????try?{??
- ??????ImageTag?imgLink;??
- ??????ObjectFindingVisitor?visitor?=?new?ObjectFindingVisitor(ImageTag.class);??
- ??????Parser?parser?=?new?Parser();??
- ??????parser.setURL("http://www.google.com");??
- ??????parser.setEncoding(parser.getEncoding());??
- ??????parser.visitAllNodesWith(visitor);??
- ??????Node[]?nodes?=?visitor.getTags();??
- ??????for?(int?i?=?0;?i?<?nodes.length;?i++)?{??
- ????????imgLink?=?(ImageTag)?nodes[i];??
- ????????logger.fatal("testImageVisitor()?ImageURL?=?"?+?imgLink.getImageURL());??
- ????????logger.fatal("testImageVisitor()?ImageLocation?=?"?+?imgLink.extractImageLocn());??
- ????????logger.fatal("testImageVisitor()?SRC?=?"?+?imgLink.getAttribute("SRC"));??
- ??????}??
- ????}?catch?(Exception?e)?{??
- ??????e.printStackTrace();??
- ????}??
- ??}??
- ??
- ??/*?
- ???*?测试TagNameFilter用法?
- ???*/??
- ??public?void?testNodeFilter()?{??
- ????try?{??
- ??????NodeFilter?filter?=?new?TagNameFilter("IMG");??
- ??????Parser?parser?=?new?Parser();??
- ??????parser.setURL("http://www.google.com");??
- ??????parser.setEncoding(parser.getEncoding());??
- ??????NodeList?list?=?parser.extractAllNodesThatMatch(filter);??
- ??????for?(int?i?=?0;?i?<?list.size();?i++)?{??
- ????????logger.fatal("testNodeFilter()?"?+?list.elementAt(i).toHtml());??
- ??????}??
- ????}?catch?(Exception?e)?{??
- ??????e.printStackTrace();??
- ????}??
- ??
- ??}??
- ??
- ??/*?
- ???*?测试NodeClassFilter用法?
- ???*/??
- ??public?void?testLinkTag()?{??
- ????try?{??
- ??
- ??????NodeFilter?filter?=?new?NodeClassFilter(LinkTag.class);??
- ??????Parser?parser?=?new?Parser();??
- ??????parser.setURL("http://www.google.com");??
- ??????parser.setEncoding(parser.getEncoding());??
- ??????NodeList?list?=?parser.extractAllNodesThatMatch(filter);??
- ??????for?(int?i?=?0;?i?<?list.size();?i++)?{??
- ????????LinkTag?node?=?(LinkTag)?list.elementAt(i);??
- ????????logger.fatal("testLinkTag()?Link?is?:"?+?node.extractLink());??
- ??????}??
- ????}?catch?(Exception?e)?{??
- ??????e.printStackTrace();??
- ????}??
- ??
- ??}??
- ??
- ??/*?
- ???*?测试<link?href="?text=’text/css’?rel=’stylesheet’?/>用法?
- ???*/??
- ??public?void?testLinkCSS()?{??
- ????try?{??
- ??
- ??????Parser?parser?=?new?Parser();??
- ??????parser.setInputHTML("<head><title>Link?Test</title>"??
- ??????????+?"<link?href=’/test01/css.css'?text='text/css'?rel='stylesheet'?/>"??
- ??????????+?"<link?href='/test02/css.css'?text='text/css'?rel='stylesheet'?/>"?+?"</head>"??
- ??????????+?"<body>");??
- ??????parser.setEncoding(parser.getEncoding());??
- ??
- ??????for?(NodeIterator?e?=?parser.elements();?e.hasMoreNodes();)?{??
- ????????Node?node?=?e.nextNode();??
- ????????logger.fatal("testLinkCSS()"?+?node.getText()?+?node.getClass());??
- ??
- ??????}??
- ????}?catch?(Exception?e)?{??
- ??????e.printStackTrace();??
- ????}??
- ??}??
- ??
- ??/*?
- ???*?测试OrFilter的用法?
- ???*/??
- ??public?void?testOrFilter()?{??
- ????NodeFilter?inputFilter?=?new?NodeClassFilter(InputTag.class);??
- ????NodeFilter?selectFilter?=?new?NodeClassFilter(SelectTag.class);??
- ??
- ????NodeList?nodeList?=?null;??
- ??
- ????try?{??
- ??????Parser?parser?=?new?Parser();??
- ??????parser??
- ??????????.setInputHTML("<head><title>OrFilter?Test</title>"??
- ??????????????+?"<link?href='/test01/css.css'?text='text/css'?rel='stylesheet'?/>"??
- ??????????????+?"<link?href='/test02/css.css'?text='text/css'?rel='stylesheet'?/>"??
- font-size: 1em; margin-top: 0px; margin-righ
1 楼 满月无双 2011-08-10testLinkCSS()方法怎么取出 <link href属性的值呢??2 楼 zxhDaniel 2011-08-13满月无双 写道testLinkCSS()方法怎么取出 <link href属性的值呢??
你可以拿到那个link这个节点的对象Node,然后再用getAttribute()这种类似的方法取出来