一个相似文本检测的问题 大神进来帮忙看看程序
程序有时报错有时正常 英文好像有点检测不了还有 我想加入 显示相同语句这一功能 大神能不能帮我加个检测相同语句的方法啊
程序代码:
package simil;
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.util.*;
import java.util.List;
import java.util.regex.*;
import javax.swing.JOptionPane;
public class FileWindows extends Frame implements ActionListener {
float similarity;
String SFname = "";
String TFname = "";
FileDialog file_open;
DirPanel dirPanel;
MainPanel mainPanel;
Button taButton;
FileWindows(){
super("文档检测系统");
setLocation(300, 50);
setSize(600,500);
dirPanel = new DirPanel();
mainPanel = new MainPanel();
taButton = new Button("开始检测");
taButton.addActionListener(this);
dirPanel.SFbutton.addActionListener(this);
dirPanel.TFbutton.addActionListener(this);
add(dirPanel,BorderLayout.NORTH);
add(mainPanel,BorderLayout.CENTER);
add(taButton,BorderLayout.SOUTH);
setResizable(false);
setBackground(Color.GRAY);
setVisible(true);
validate();
addWindowListener(new WindowAdapter(){
public void windowClosing(WindowEvent e) {
setVisible(false);
System.exit(0);
}
});
file_open = new FileDialog(this,"打开文件对话框",FileDialog.LOAD);
file_open.addWindowListener(new WindowAdapter(){
public void windowClosing(WindowEvent e) {
file_open.setVisible(false);
}
});
}
public void actionPerformed(ActionEvent e) {
if(e.getSource()==dirPanel.SFbutton){
file_open.setVisible(true);
SFname = file_open.getDirectory()+file_open.getFile();
dirPanel.SFdir.setText(SFname);
}
else if(e.getSource()==dirPanel.TFbutton){
file_open.setVisible(true);
TFname = file_open.getDirectory();
dirPanel.TFdir.setText(file_open.getDirectory());
}
else if(e.getSource()==taButton){
try{
File sf = new File(dirPanel.SFdir.getText());
File tf = new File(dirPanel.TFdir.getText());
File[] tFiles = tf.listFiles();
mainPanel.ta1.setText("");
mainPanel.ta2.setText("");
File temp;
for(int i=0;i<tFiles.length;i++){
parse(sf,tFiles[i]);
}
for(int i=0;i<tFiles.length-1;i++){
for(int j=i+1;j<tFiles.length;j++){
if(parse(sf,tFiles[i])<parse(sf, tFiles[j])){
temp = tFiles[i];
tFiles[i] = tFiles[j];
tFiles[j] = temp;
}
}
}
for(int i=0;i<tFiles.length;i++){
mainPanel.ta1.append("\n"+"检测原文档 与 "+tFiles[i].getName()+" 的相似度:"+parse(sf,tFiles[i])+"%");
same(sf,tFiles[i]);
}
}catch(NullPointerException e1){
// ta.append("请选择文档");
JOptionPane.showMessageDialog(this, "请选择文档","提示对话框",JOptionPane.ERROR_MESSAGE);
// e1.printStackTrace();
}
}
}
public double parse(File sf,File tf) {
int TRUE = 0;
BufferedReader br = null;
String s ="";
String doc1 = "";
String doc2 = "";
try {
br = new BufferedReader(new FileReader(sf));
while((s = br.readLine())!=null){
doc1 =doc2 + s;
}
br = new BufferedReader(new FileReader(tf));
while((s = br.readLine())!=null){
doc2 =doc2 + s;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}finally{
if(br!=null){
try {
br.close();
br = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
if (doc1 != null && doc1.trim().length() > 0 && doc2 != null
&& doc2.trim().length() > 0) {
Map<Integer, int[]> AlgorithmMap = new HashMap<Integer, int[]>();
for (int i = 0; i < doc1.length(); i++) {
char d1 = doc1.charAt(i);
if(isHanZi(d1)){
int charIndex = getGB2312Id(d1);
if(charIndex != -1){
int[] fq = AlgorithmMap.get(charIndex);
if(fq != null && fq.length == 2){
fq[0]++;
}else {
fq = new int[2];
fq[0] = 1;
fq[1] = 0;
AlgorithmMap.put(charIndex, fq);
}
}
}
}
for (int i = 0; i < doc2.length(); i++) {
char d2 = doc2.charAt(i);
if(isHanZi(d2)){
int charIndex = getGB2312Id(d2);
if(charIndex != -1){
int[] fq = AlgorithmMap.get(charIndex);
if(fq != null && fq.length == 2){
fq[1]++;
}else {
fq = new int[2];
fq[0] = 0;
fq[1] = 1;
AlgorithmMap.put(charIndex, fq);
}
}
}
}
Iterator<Integer> iterator = AlgorithmMap.keySet().iterator();
double sqdoc1 = 0;
double sqdoc2 = 0;
double denominator = 0;
while(iterator.hasNext()){
int[] c = AlgorithmMap.get(iterator.next());
denominator += c[0]*c[1];
sqdoc1 += c[0]*c[0];
sqdoc2 += c[1]*c[1];
}
return (denominator / Math.sqrt(sqdoc1*sqdoc2))*100;
} else {
throw new NullPointerException(
" the Document is null or have not chars!!");
}
}
public static boolean isHanZi(char ch) {
return (ch >= 0x4E00 && ch <= 0x9FA5);
}
public static short getGB2312Id(char ch) {
try {
byte[] buffer = Character.toString(ch).getBytes("GB2312");
if (buffer.length != 2) {
return -1;
}
int b0 = (int) (buffer[0] & 0x0FF) - 161;
int b1 = (int) (buffer[1] & 0x0FF) - 161;
return (short) (b0 * 94 + b1);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return -1;
}
}
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.util.*;
import java.util.List;
import java.util.regex.*;
import javax.swing.JOptionPane;
public class FileWindows extends Frame implements ActionListener {
float similarity;
String SFname = "";
String TFname = "";
FileDialog file_open;
DirPanel dirPanel;
MainPanel mainPanel;
Button taButton;
FileWindows(){
super("文档检测系统");
setLocation(300, 50);
setSize(600,500);
dirPanel = new DirPanel();
mainPanel = new MainPanel();
taButton = new Button("开始检测");
taButton.addActionListener(this);
dirPanel.SFbutton.addActionListener(this);
dirPanel.TFbutton.addActionListener(this);
add(dirPanel,BorderLayout.NORTH);
add(mainPanel,BorderLayout.CENTER);
add(taButton,BorderLayout.SOUTH);
setResizable(false);
setBackground(Color.GRAY);
setVisible(true);
validate();
addWindowListener(new WindowAdapter(){
public void windowClosing(WindowEvent e) {
setVisible(false);
System.exit(0);
}
});
file_open = new FileDialog(this,"打开文件对话框",FileDialog.LOAD);
file_open.addWindowListener(new WindowAdapter(){
public void windowClosing(WindowEvent e) {
file_open.setVisible(false);
}
});
}
public void actionPerformed(ActionEvent e) {
if(e.getSource()==dirPanel.SFbutton){
file_open.setVisible(true);
SFname = file_open.getDirectory()+file_open.getFile();
dirPanel.SFdir.setText(SFname);
}
else if(e.getSource()==dirPanel.TFbutton){
file_open.setVisible(true);
TFname = file_open.getDirectory();
dirPanel.TFdir.setText(file_open.getDirectory());
}
else if(e.getSource()==taButton){
try{
File sf = new File(dirPanel.SFdir.getText());
File tf = new File(dirPanel.TFdir.getText());
File[] tFiles = tf.listFiles();
mainPanel.ta1.setText("");
mainPanel.ta2.setText("");
File temp;
for(int i=0;i<tFiles.length;i++){
parse(sf,tFiles[i]);
}
for(int i=0;i<tFiles.length-1;i++){
for(int j=i+1;j<tFiles.length;j++){
if(parse(sf,tFiles[i])<parse(sf, tFiles[j])){
temp = tFiles[i];
tFiles[i] = tFiles[j];
tFiles[j] = temp;
}
}
}
for(int i=0;i<tFiles.length;i++){
mainPanel.ta1.append("\n"+"检测原文档 与 "+tFiles[i].getName()+" 的相似度:"+parse(sf,tFiles[i])+"%");
same(sf,tFiles[i]);
}
}catch(NullPointerException e1){
// ta.append("请选择文档");
JOptionPane.showMessageDialog(this, "请选择文档","提示对话框",JOptionPane.ERROR_MESSAGE);
// e1.printStackTrace();
}
}
}
public double parse(File sf,File tf) {
int TRUE = 0;
BufferedReader br = null;
String s ="";
String doc1 = "";
String doc2 = "";
try {
br = new BufferedReader(new FileReader(sf));
while((s = br.readLine())!=null){
doc1 =doc2 + s;
}
br = new BufferedReader(new FileReader(tf));
while((s = br.readLine())!=null){
doc2 =doc2 + s;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}finally{
if(br!=null){
try {
br.close();
br = null;
} catch (IOException e) {
e.printStackTrace();
}
}
}
if (doc1 != null && doc1.trim().length() > 0 && doc2 != null
&& doc2.trim().length() > 0) {
Map<Integer, int[]> AlgorithmMap = new HashMap<Integer, int[]>();
for (int i = 0; i < doc1.length(); i++) {
char d1 = doc1.charAt(i);
if(isHanZi(d1)){
int charIndex = getGB2312Id(d1);
if(charIndex != -1){
int[] fq = AlgorithmMap.get(charIndex);
if(fq != null && fq.length == 2){
fq[0]++;
}else {
fq = new int[2];
fq[0] = 1;
fq[1] = 0;
AlgorithmMap.put(charIndex, fq);
}
}
}
}
for (int i = 0; i < doc2.length(); i++) {
char d2 = doc2.charAt(i);
if(isHanZi(d2)){
int charIndex = getGB2312Id(d2);
if(charIndex != -1){
int[] fq = AlgorithmMap.get(charIndex);
if(fq != null && fq.length == 2){
fq[1]++;
}else {
fq = new int[2];
fq[0] = 0;
fq[1] = 1;
AlgorithmMap.put(charIndex, fq);
}
}
}
}
Iterator<Integer> iterator = AlgorithmMap.keySet().iterator();
double sqdoc1 = 0;
double sqdoc2 = 0;
double denominator = 0;
while(iterator.hasNext()){
int[] c = AlgorithmMap.get(iterator.next());
denominator += c[0]*c[1];
sqdoc1 += c[0]*c[0];
sqdoc2 += c[1]*c[1];
}
return (denominator / Math.sqrt(sqdoc1*sqdoc2))*100;
} else {
throw new NullPointerException(
" the Document is null or have not chars!!");
}
}
public static boolean isHanZi(char ch) {
return (ch >= 0x4E00 && ch <= 0x9FA5);
}
public static short getGB2312Id(char ch) {
try {
byte[] buffer = Character.toString(ch).getBytes("GB2312");
if (buffer.length != 2) {
return -1;
}
int b0 = (int) (buffer[0] & 0x0FF) - 161;
int b1 = (int) (buffer[1] & 0x0FF) - 161;
return (short) (b0 * 94 + b1);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return -1;
}
}
----------------解决方案--------------------------------------------------------
主要的代码就这段 其他的界面代码没贴出来
大神麻烦看看啊
----------------解决方案--------------------------------------------------------