最近要用到Java提取html表单元素,学习使用htmlparser提取表单元素的一些简单常用的方法,在此总结一下!
第一步:读取指定的html文件
public static String ReadFile(String filepaths) throws IOException, ParserException
{
InputStreamReader istrem=null;
File file=new File(filepaths);
String readStr="";
try {
istrem=new InputStreamReader(new FileInputStream(file),"unicode");
BufferedReader iread=new BufferedReader(istrem);
while(null!=(readStr=iread.readLine()))
{
// System.out.println(readStr);
filepaths+=readStr.toString();
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return filepaths;
}
第二步:下载辅助工具包htmllexer.jar htmlparser.jar导入项目
第三步:定义类要获取的信息
eg:public class InputT {
private String id;
private String name;
private String onkeydown;
private String poppding;
//将其属性封装
}
第四步:将其存入map中
public class MapCollection {
public String id;
private InputT input;
Map<String,InputT> IMap=new HashMap<String, InputT>();
//以次将要用的的类封装到map中以便以后调用
public Map<String, InputT> getIMap() {
IMap.put(id,input);
return IMap;
}
public void setIMap(Map<String, InputT> iMap) {
IMap = iMap;
}
}
}
第五步:获取想要的信息
package com.text;
import java.util.HashMap;
import java.util.Map;
import org.htmlparser.NodeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.InputTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.SelectTag;
import org.htmlparser.tags.TextareaTag;
import org.htmlparser.util.NodeList;
public class SplitToHtml {
MapCollection mc=new MapCollection();
Map<String,Button> map= mc.getMap();
Map<String,SelectT> Smap=new HashMap<String,SelectT>();
Map<String,RiliT> Rmap=mc.getRmap();
Map<String,InputT> input=mc.getIMap();
Map<String,Inputdown> inputDw=mc.getIDmap();
Map<String,Image> ImgMap=mc.getImgMap();
Map<String,TextAreaT> TAMap=mc.getTAMap();
Map<String,Divs> divMap=mc.getDivMap();
Map<String,Alink> AMap=mc.getAMap();
public void getSpitInput(String cssandtitle) //分隔文本框
{
NodeFilter filter=new TagNameFilter("input"); //设置过滤器
NodeList nodelt=Parsers.getParsers(cssandtitle).extractAllNodesThatMatch(filter,true); //节点列表中的匹配filter的节点
System.out.println("匹配节点个数:"+nodelt.size());
InputTag link=null;
for(int i=0;i<nodelt.size();i++)
{
link=(InputTag) nodelt.elementAt(i);
String id=link.getAttribute("id"); //获取熟悉值
String onclick = link.getAttribute("onclick"); //获取熟悉值
String ponding = link.getAttribute("PropBindings"); //获取熟悉值
String name = link.getAttribute("name"); //获取熟悉值
String onkeydown = link.getAttribute("onkeydown"); //获取熟悉值
String type = link.getAttribute("type"); //获取熟悉值
String expression=link.getAttribute("expression"); //获取表达式值
if(onclick!=null&&ponding==null&&expression!=null) //判断按钮
{
Button bt=new Button();
bt.setExpression(expression);
bt.setId(id);
bt.setName(name);
bt.setOnclick(onclick);
String ids=bt.getId();
link.setAttribute("id","<<<<<<<<<<<<<<<<<<"); //修改文本框id的值为"<<<<<<<<<<<<<<<<<<"
link.setAttribute("onclick", "__________________"); //修改文本框的onclick事件为__________________
// String str=nodelt.toHtml();
// System.out.println("???????????????????????");
// System.out.println(str);
// System.out.println("???????????????????????");
map.put(ids,bt);
}
if(onclick!=null&&ponding!=null){ //判断日历控件
RiliT rt=new RiliT();
rt.setId(id);
rt.setName(name);
rt.setOnclick(onclick);
String key=rt.getId();
Rmap.put(key,rt);
}
if(ponding!=null&&onkeydown==null&&onclick==null){//判断文本框
InputT t = new InputT();
t.setId(id);
t.setName(name);
t.setPoppding(ponding);
String key=t.getId();
input.put(key, t);
}
if(ponding!=null&&onkeydown!=null){ //带回车事件的文本框
Inputdown ind=new Inputdown();
ind.setId(id);
ind.setName(name);
ind.setOnkeydown(onkeydown);
String key=ind.getId();
inputDw.put(key,ind);
}
}
}
第六步:调用此方法
public static void main(String[] args) throws IOException, ParserException {
String filepaths = "http://www.baidu.com/index.html";
String cssandtitle=ReadFile(filepaths);
SplitToHtml sph=new SplitToHtml();
sph.getSplitToSelect(cssandtitle);
}