转载至:
http://bbs.chinaunix.net/thread-387085-1-1.html
package com.util;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.Date;
public class Unicode {
public static void main(String[] args) {
Unicode unicode = new Unicode();
unicode.toIndex("E:\\taobao");
}
/**
* 处理某个目录下的文件
* @param path
*/
public void toIndex(String path) {
toIndex(new File(path));
}
/**
* 处理某个File对象
* @param file
*/
private void toIndex(File file) {
Date start = new Date();
int number = indexFiles(file);
Date end = new Date();
System.out.println("总共耗时" + (end.getTime()-start.getTime()) + "毫秒");
System.out.println("一共处理" + number + "个文件");
}
/**
* 递归遍历文件目录来建立索引
* @param file
* @return
*/
private int indexFiles(File file) {
if (file.isDirectory()){
File[] files = file.listFiles();
int num = 0;
for (int i=0;i<files.length;i++) {
num += indexFiles(files[i]);
}
return num;
} else {
if (file.getPath().endsWith(".js"))
{
System.out.println("正在处理:" + file);
unicode(file.getAbsolutePath());
return 1;
}
else
{
System.out.println("文件类型不支持" + file);
return 0;
}
}
}
/**
* 处理文件中的unicode字符
* @param filePath
*/
private void unicode(String filePath) {
String resultString = findAll(filePath);
findLog(filePath, resultString);
}
public void findLog(String logFile, String logFill) {
File file = new File(logFile);
try {
BufferedWriter out = new BufferedWriter(new FileWriter(file));
out.write(logFill);
out.close();
} catch (IOException ex) {
throw new RuntimeException("文件读写错误");
}
}
public String findAll(String filepath) {
StringBuffer stringBuffer = new StringBuffer();
FileReader fileReader = null;
BufferedReader bufferedReader = null;
try {
fileReader = new FileReader(filepath);
bufferedReader = new BufferedReader(fileReader);
String line = bufferedReader.readLine();
while (line != null) {
line = decodeUnicode(line).toString();
stringBuffer.append(line);
stringBuffer.append("\r\n");
line = bufferedReader.readLine();
}
} catch (Exception e) {
System.err.println(e.toString());
} finally {
try {
bufferedReader.close();
fileReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
return stringBuffer.toString();
}
/**
* This method will decode the String to a recognized String in ui.
* 功能:将unicod码转为需要的格式
*
* @author javajohn
* @param dataStr
* @return
*/
public static StringBuffer decodeUnicode(final String dataStr) {
final StringBuffer buffer = new StringBuffer();
String tempStr = "";
String operStr = dataStr;
if (operStr != null && operStr.indexOf("\\u") == -1)
return buffer.append(operStr); //
if (operStr != null && !operStr.equals("") && !operStr.startsWith("\\u")) { //
tempStr = operStr.substring(0, operStr.indexOf("\\u")); //
operStr = operStr.substring(operStr.indexOf("\\u"), operStr.length());// operStr字符一定是以unicode编码字符打头的字符串
}
buffer.append(tempStr);
while (operStr != null && !operStr.equals("") && operStr.startsWith("\\u")) { // 循环处理,处理对象一定是以unicode编码字符打头的字符串
tempStr = operStr.substring(0, 6);
operStr = operStr.substring(6, operStr.length());
String charStr = "";
charStr = tempStr.substring(2, tempStr.length());
char letter = (char) Integer.parseInt(charStr, 16); // 16进制parse整形字符串。
buffer.append(new Character(letter).toString());
if (operStr.indexOf("\\u") == -1) { //
buffer.append(operStr);
} else { // 处理operStr使其打头字符为unicode字符
tempStr = operStr.substring(0, operStr.indexOf("\\u"));
operStr = operStr.substring(operStr.indexOf("\\u"), operStr.length());
buffer.append(tempStr);
}
}
return buffer;
}
public static void writeUnicode(final DataOutputStream out,
final String value) {
try {
final String unicode = gbEncoding(value);
final byte[] data = unicode.getBytes();
final int dataLength = data.length;
System.out.println(" Data Length is: " + dataLength);
System.out.println(" Data is: " + value);
out.writeInt(dataLength); // 先写出字符串的长度
out.write(data, 0, dataLength); // 然后写出转化后的字符串
} catch (IOException e) {
}
}
public static String gbEncoding(final String gbString) {
char[] utfBytes = gbString.toCharArray();
String unicodeBytes = "";
for (int byteIndex = 0; byteIndex < utfBytes.length; byteIndex++) {
String hexB = Integer.toHexString(utfBytes[byteIndex]);
if (hexB.length() <= 2) {
hexB = "00" + hexB;
}
unicodeBytes = unicodeBytes + "\\u" + hexB;
}
// System.out.println("unicodeBytes is: " + unicodeBytes);
return unicodeBytes;
}
}