当前位置: 代码迷 >> Java相关 >> 一个相似文本检测的问题 大神进来帮忙看看程序
  详细解决方案

一个相似文本检测的问题 大神进来帮忙看看程序

热度:417   发布时间:2013-09-10 10:34:00.0
一个相似文本检测的问题 大神进来帮忙看看程序
程序有时报错有时正常  英文好像有点检测不了
还有 我想加入  显示相同语句这一功能 大神能不能帮我加个检测相同语句的方法啊
程序代码:
package simil;

import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.util.*;
import java.util.List;
import java.util.regex.*;
import javax.swing.JOptionPane;

public class FileWindows extends Frame implements ActionListener {

    float similarity;
    String SFname = "";
    String TFname = "";
    FileDialog file_open;
    DirPanel dirPanel;
    MainPanel mainPanel;
    Button taButton;

    FileWindows(){
        super("文档检测系统");
        setLocation(300, 50);
        setSize(600,500);
        dirPanel = new DirPanel();
        mainPanel = new MainPanel();
        taButton = new Button("开始检测");
        
        taButton.addActionListener(this);
        dirPanel.SFbutton.addActionListener(this);
        dirPanel.TFbutton.addActionListener(this);
        add(dirPanel,BorderLayout.NORTH);
        add(mainPanel,BorderLayout.CENTER);
        add(taButton,BorderLayout.SOUTH);
        setResizable(false);
        setBackground(Color.GRAY);
        setVisible(true);
        validate();
        
        addWindowListener(new WindowAdapter(){

            public void windowClosing(WindowEvent e) {
                setVisible(false);
                System.exit(0);
            }
        });
        file_open = new FileDialog(this,"打开文件对话框",FileDialog.LOAD);
        file_open.addWindowListener(new WindowAdapter(){

            public void windowClosing(WindowEvent e) {
                file_open.setVisible(false);
            }
        });
        
    }

public void actionPerformed(ActionEvent e) {
   
    if(e.getSource()==dirPanel.SFbutton){
        file_open.setVisible(true);
        SFname = file_open.getDirectory()+file_open.getFile();
        dirPanel.SFdir.setText(SFname);

}
    else if(e.getSource()==dirPanel.TFbutton){
        file_open.setVisible(true);
        TFname = file_open.getDirectory();
        dirPanel.TFdir.setText(file_open.getDirectory());
    }
    else if(e.getSource()==taButton){
        try{
            File sf = new File(dirPanel.SFdir.getText());
            File tf = new File(dirPanel.TFdir.getText());
            File[] tFiles = tf.listFiles();
        
            mainPanel.ta1.setText("");
            mainPanel.ta2.setText("");
        
        File temp;
        for(int i=0;i<tFiles.length;i++){
            parse(sf,tFiles[i]);

        }
        for(int i=0;i<tFiles.length-1;i++){
            for(int j=i+1;j<tFiles.length;j++){
                if(parse(sf,tFiles[i])<parse(sf, tFiles[j])){
                    temp = tFiles[i];
                    tFiles[i] = tFiles[j];
                    tFiles[j] = temp;
                }
            }
        }
        
        
        for(int i=0;i<tFiles.length;i++){
            
                mainPanel.ta1.append("\n"+"检测原文档 与 "+tFiles[i].getName()+" 的相似度:"+parse(sf,tFiles[i])+"%");
                same(sf,tFiles[i]);
               
    }   
        }catch(NullPointerException e1){
//            ta.append("请选择文档");
            JOptionPane.showMessageDialog(this, "请选择文档","提示对话框",JOptionPane.ERROR_MESSAGE);
//            e1.printStackTrace();
        }
   
    }
}


public double parse(File sf,File tf) {
    int TRUE = 0;
    BufferedReader br = null;
    String s ="";
    String doc1 = "";
    String doc2 = "";
   
    try {
        br = new BufferedReader(new FileReader(sf));
        while((s = br.readLine())!=null){
            doc1 =doc2 + s;
        }
            br = new BufferedReader(new FileReader(tf));
            while((s = br.readLine())!=null){
                doc2 =doc2 + s;
            }
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }catch (IOException e) {
        e.printStackTrace();
    }finally{
        if(br!=null){
            try {
                br.close();
                br = null;
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
   
    }
   
            
        if (doc1 != null && doc1.trim().length() > 0 && doc2 != null
                    && doc2.trim().length() > 0) {
            
               
                Map<Integer, int[]> AlgorithmMap = new HashMap<Integer, int[]>();
                for (int i = 0; i < doc1.length(); i++) {
                    char d1 = doc1.charAt(i);
                    if(isHanZi(d1)){
                        int charIndex = getGB2312Id(d1);
                        if(charIndex != -1){
                            int[] fq = AlgorithmMap.get(charIndex);
                            if(fq != null && fq.length == 2){
                                fq[0]++;
                            }else {
                                fq = new int[2];
                                fq[0] = 1;
                                fq[1] = 0;
                                AlgorithmMap.put(charIndex, fq);
                            }
                        }
                    }
                }

                for (int i = 0; i < doc2.length(); i++) {
                    char d2 = doc2.charAt(i);
                    if(isHanZi(d2)){
                        int charIndex = getGB2312Id(d2);
                        if(charIndex != -1){
                            int[] fq = AlgorithmMap.get(charIndex);
                            if(fq != null && fq.length == 2){
                                fq[1]++;
                            }else {
                                fq = new int[2];
                                fq[0] = 0;
                                fq[1] = 1;
                                AlgorithmMap.put(charIndex, fq);
                            }
                        }
                    }
                }
               
                Iterator<Integer> iterator = AlgorithmMap.keySet().iterator();
                double sqdoc1 = 0;
                double sqdoc2 = 0;
                double denominator = 0;
                while(iterator.hasNext()){
                    int[] c = AlgorithmMap.get(iterator.next());
                    denominator += c[0]*c[1];
                    sqdoc1 += c[0]*c[0];
                    sqdoc2 += c[1]*c[1];
                }
               
                return (denominator / Math.sqrt(sqdoc1*sqdoc2))*100;
            } else {
                throw new NullPointerException(
                        " the Document is null or have not chars!!");
            }
        
        }

        public static boolean isHanZi(char ch) {
            return (ch >= 0x4E00 && ch <= 0x9FA5);

        }

        public static short getGB2312Id(char ch) {
            try {
                byte[] buffer = Character.toString(ch).getBytes("GB2312");
                if (buffer.length != 2) {
                    return -1;
                }
                int b0 = (int) (buffer[0] & 0x0FF) - 161;
                int b1 = (int) (buffer[1] & 0x0FF) - 161;
                return (short) (b0 * 94 + b1);
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            return -1;
   
}

}
搜索更多相关的解决方案: 英文  检测  

----------------解决方案--------------------------------------------------------
主要的代码就这段   其他的界面代码没贴出来
大神麻烦看看啊
----------------解决方案--------------------------------------------------------
  相关解决方案