如下,有这样的一堆连接,按照每一行一个连接存储在txt文档中。
http://123.com/uiudoa=jf123&dfd=123456
http://123.com/doa=jf123&dfd=123456
http://baolaoda.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456
http://46.com/jiidoa=jf56
http://xxrbz.com/jiioa123fd=123456
http://liumangtu.com/jiioa123fd=123456
http://123.com/jiioa123fd=123456
首先,这些链接按照每行一个链接存储在txt文本文档中。
然后,导入txt文件,对这些链接中做如下处理:
将只要出现了相同的域名,那么就只保留其中的任意一条,其他的全部删除。
最后,将处理后的结果保存。(也就是更新文本内容后保存)
PS:我是菜鸟级别,求各位大哥大姐详细点拨~~~Thanks very much!![size=12px][/size]
------解决方案--------------------
for example
- Java code
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("test.txt")));PrintStream ps = new PrintStream(new FileOutputStream("test_new.txt"));String buf;Set<String> domain = new HashSet<String>();while ((buf=br.readLine()) != null) { if (buf.matches("(?i)http[s]?[:]//(www[.])?(.*?)/.*")) { String key = buf.replaceAll("(?i)http[s]?[:]//(www[.])?(.*?)/.*", "$2"); if (domain.contains(key)) { continue; } domain.add(key); ps.println(buf); }}ps.close();br.close();
------解决方案--------------------
完整的EX,利用HashMap,去掉重复的域名:
- Java code
import java.io.*;import java.util.regex.*;import java.util.*;public class filetest1 { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { // TODO Auto-generated method stub BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("d:\\temp\\t.txt"))); String data = null; String domain=null; HashMap map=new HashMap(); while((data = br.readLine())!=null) { Pattern pattern = Pattern.compile("(http://)(.*\\w+\\.\\w+)/.*"); Matcher matcher = pattern.matcher(data); if (matcher.find()) { domain=matcher.group(2).toString(); map.put(domain,data); } } br.close(); FileWriter fw = new FileWriter("d:\\temp\\t1.txt"); Iterator iter = map.entrySet().iterator(); while (iter.hasNext()) { Map.Entry entry = (Map.Entry) iter.next(); Object val = entry.getValue(); data=val.toString()+"\n"; fw.write(data,0,data.length()); } fw.close(); }}
------解决方案--------------------
香香结贴吧,我写的这个,代码量很多啊
- Java code
package xiang;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.io.PrintWriter;import java.nio.charset.Charset;import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Set;public class Demo { public Map readFromTxt(String fileName) { File file = new File(fileName); // (一):建立【读】的管子 FileInputStream input = null; InputStreamReader insr = null; BufferedReader read = null; Map map = new HashMap();// 存储域名和链接的map String line = ""; try { input = new FileInputStream(file);// 读 insr = new InputStreamReader(input, Charset.forName("UTF-8"));// 按照UTF-8字符集的方式读,也可以不写Charset.forName("UTF-8"); read = new BufferedReader(insr); // (二):读取一行,也就是一行一行的读 line = read.readLine(); while (line != null) { // 如果line==null证明已经读完了,如果line!=null证明没有读完 // (二):读取一行,也就是一行一行的读 String realmName = getRealmName(line); if (map.size() == 0) { map.put(realmName, line); } else { // 遍历map,看map中是否已经存在这个域名。 Set set = map.keySet(); Iterator it = set.iterator(); String key = "";// 用来存域名 String value = "";// 用来存链接字符串 while (it.hasNext()) { String n = (String) it.next(); if (n.equals(realmName)) { break;// 如果已经存在,就跳出循环 } else { // 如果不存在,就把域名和这一行字符串赋值给key,value key = realmName; value = line; } } if (!"".equals(key) && !"".equals(value)) { map.put(key, value); key = ""; value = ""; } } line = read.readLine();// 继续读 } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { // (三):关闭流 if (input != null) { input.close(); } if (insr != null) { insr.close(); } if (read != null) { read.close(); } } catch (IOException e) { e.printStackTrace(); } } return map; } /** * 截取字符串中的域名 详细解析看RealmNameDemo * */ public String getRealmName(String line) { int begin = -1; int end = -1; int times = 1; for (int i = 0; i < line.length(); i++) { char c = line.charAt(i); if ('/' == c && times == 2) { begin = i; times++; } else if ('/' == c && times == 3) { end = i; break; } else if ('/' == c) { times++; } } String realmName = line.substring(begin + 1, end); return realmName; } /** * 往新的txt文件中写 * */ public void writeToTxt(String fileName, Map map) { File file = new File(fileName); if (file.exists()) {// 如果这个文件不存在,就新建一个 file.mkdirs(); } // (一):建立【写】的管子 FileOutputStream output = null; OutputStreamWriter outsw = null; BufferedWriter buffWriter = null; PrintWriter write = null; try { output = new FileOutputStream(file); outsw = new OutputStreamWriter(output); buffWriter = new BufferedWriter(outsw); write = new PrintWriter(buffWriter,true); Set set = map.keySet(); Iterator it = set.iterator(); while (it.hasNext()) { String key = (String) it.next(); String line = (String) map.get(key); write.println(line);// 写入 } write.flush();// 刷新下管道 } catch (FileNotFoundException e) { e.printStackTrace(); } finally { try { if (write != null) { write.close(); } if (buffWriter != null) { buffWriter.close(); } if (outsw != null) { outsw.close(); } if (output != null) { output.close(); } } catch (IOException e) { e.printStackTrace(); } } } public static void main(String[] args) { String readFileName = "F:\\MyWorkSpaces\\exerworkspace\\Struts2Web\\src\\xiang\\old.txt"; Demo demo = new Demo(); Map map = demo.readFromTxt(readFileName); String writeFileName = "F:\\MyWorkSpaces\\exerworkspace\\Struts2Web\\src\\xiang\\new.txt"; demo.writeToTxt(writeFileName, map); }}