package my.Browser;
import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.BorderFactory;
import javax.swing.JButton;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.border.EmptyBorder;
import javax.swing.border.EtchedBorder;
import org.jdesktop.jdic.browser.*;
public class Urls {
private String startUrl; // 开始采集网址
String urlContent;
String ContentArea;
private String strAreaBegin, strAreaEnd; // 采集区域开始采集字符串和结束采集字符串
@SuppressWarnings("unused")
private String stringInUrl, stringNotInUrl;
String strContent;// 获得的采集内容
String[] allUrls; // 采集到的所有网址
private String regex; // 采集规则
// UrlAndTitle urlAndTitle = new UrlAndTitle(); // 存储网址和标题
JFrame Zhankaiframe;
JPanel jTianchong1 = new JPanel();
JButton jButton1 = new JButton();
JPanel Panel1 = new JPanel();
JPanel Panel2 = new JPanel();
int j=0;
String[] buff=new String[50];
WebBrowser Browser1 = null;
WebBrowser Browser2=null;
public static void main(String[] args)
{
//int j=0;
Urls myurl = new Urls("<body", "/body>");
myurl.getStartUrl("http://www.baidu.com/");
myurl.getUrlContent();
myurl.getContentArea();
myurl.getStringInUrl("http://www.baidu.com/");
myurl.getStringNotInUrl("google");
myurl.ShowUrls();//获取超链接的网址 ShowUrls()是方法
myurl.Kuangjia();
myurl.Twoweb();
}
// 初始化构造函数 strAreaBegin 和strAreaEnd
public Urls(String strAreaBegin, String strAreaEnd) {
this.strAreaBegin = strAreaBegin;//"<body"
this.strAreaEnd = strAreaEnd;// "/body>"
}
//把www.baidu.com上所有超链接存到buff[i]中
public void ShowUrls()
{
//String[] buff=new String[50];
int i=0;//j是网页中url的个数
final String regex = "<a.*?/a>";//<a开始 /a>结束 *?代表中间出现0次1次或多次
final Pattern pt = Pattern.compile(regex);
final Matcher mt = pt.matcher(ContentArea);
while (mt.find()) {//public boolean find() 尝试查找与该模式匹配的输入序列的下一个子序列。
// 获取标题
final Matcher title = Pattern.compile(">.*?</a>").matcher(mt.group());
while (title.find()) {
String biaoti=title.group().replaceAll(">|</a>| ", "");
}
// 获取网址
String mm="(http|ftp|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,@?^=%&:/~\\+#]*[\\w\\-\\@?^=%&/~\\+#])?";
final Matcher myurl = Pattern.compile(mm).matcher(mt.group());//匹配"href="到“>”间的内容,即网址
while (myurl.find()) {
String chaolianjie= myurl.group().replaceAll("href=|>", "");
buff[i]=chaolianjie;
i++;
}
}
}
//初始化框架
public void Kuangjia(){
Zhankaiframe= new JFrame("NUPT WDQ WEBBROWSER 展开");
Zhankaiframe.setSize(900,900);
BorderLayout borderLayout1 = new BorderLayout();
jTianchong1.setLayout(new BorderLayout());
jTianchong1.setPreferredSize(new Dimension(40, 40));
Zhankaiframe.add(jTianchong1, BorderLayout.NORTH);
jButton1.setBorder(BorderFactory.createCompoundBorder(new EmptyBorder(0,
2, 0, 2),
new EtchedBorder()));
jButton1.setMaximumSize(new Dimension(60, 25));