大家帮我看一下这个代码:
import java.io.IOException;
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.GetMethod;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.HttpClient;
public class getCode{
public static String readHtml(HttpClient client,String url) throws IOException {
StringBuffer a=new StringBuffer();
String htmlFile = " ";
File f1=new File( "E:\\jsp\\test.txt ");
if(!f1.exists()){ //检查文件是否存在
System.out.print( "没有相应的文件 ");
try
{
f1.createNewFile();
} catch (IOException e)
{
e.printStackTrace();
}
}
FileWriter fs=new FileWriter(f1);
int i=0;
String str = null;
GetMethod get = new GetMethod(url);
try {
client.executeMethod(get);
str = get.getResponseBodyAsString();
}
catch (HttpException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
get.releaseConnection();
fs.write(str);
BufferedReader reader=new BufferedReader(new FileReader( "E:\\jsp\\test.txt "));
String line=reader.readLine();
while(line!=null){
line = reader.readLine();
String r= "(\\r|\\n|\\s+ <td align=\ "center\ "\\s+> )(.*)( </td> |\\n|\\r) ";
String r1= "(.*共 <span class=\ "text_orange\ "> )(.*)( </span> 页.*) ";
Pattern s=Pattern.compile(r);