java-BufferedImage 图片验证码去除干扰线的方法( 用于OCR tesseract图像智能字符识别)
图片验证码自动识别的功能
网上对于初始图片的处理方法有去噪点、灰度化等,唯独难搜到去除干扰线的方法,可以比较干净地去除干扰线,提高OCR识别的准确率,“去除干扰线条“.
测试样板图片和数据:
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;import javax.imageio.ImageIO;public class CopyOfCleanLines {public static void main(String[] args) throws IOException { File testDataDir = File("F:\\ocr"); final String destDir = testDataDir.getAbsolutePath()+"/tmp"; for (File file : testDataDir.listFiles()) { cleanLinesInImage(file, destDir); cleanLinesInImage(file, destDir); cleanLinesInImage(file, destDir);} } /** * * @param sfile * 需要去噪的图像 * @param destDir * 去噪后的图像保存地址 * @throws IOException */ public static void cleanLinesInImage(File sfile, String destDir) throws IOException{ File destF = new File(destDir); if (!destF.exists()) { destF.mkdirs(); } BufferedImage bufferedImage = ImageIO.read(sfile); int h = bufferedImage.getHeight(); int w = bufferedImage.getWidth(); // 灰度化 int[][] gray = new int[w][h]; for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { int argb = bufferedImage.getRGB(x, y); // 图像加亮(调整亮度识别率非常高) int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30); int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30); int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30); if (r >= 255) { r = 255; } if (g >= 255) { g = 255; } if (b >= 255) { b = 255; } gray[x][y] = (int) Math .pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2) * 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2); } } // 二值化 int threshold = ostu(gray, w, h); BufferedImage binaryBufferedImage = new BufferedImage(w, h, BufferedImage.TYPE_BYTE_BINARY); for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { if (gray[x][y] > threshold) { gray[x][y] |= 0x00FFFF; } else { gray[x][y] &= 0xFF0000; } binaryBufferedImage.setRGB(x, y, gray[x][y]); } } //去除干扰线条for(int y = 1; y < h-1; y++){for(int x = 1; x < w-1; x++){ boolean flag = false ;if(isBlack(binaryBufferedImage.getRGB(x, y))){//左右均为空时,去掉此点if(isWhite(binaryBufferedImage.getRGB(x-1, y)) && isWhite(binaryBufferedImage.getRGB(x+1, y))){flag = true;}//上下均为空时,去掉此点if(isWhite(binaryBufferedImage.getRGB(x, y+1)) && isWhite(binaryBufferedImage.getRGB(x, y-1))){flag = true;}//斜上下为空时,去掉此点if(isWhite(binaryBufferedImage.getRGB(x-1, y+1)) && isWhite(binaryBufferedImage.getRGB(x+1, y-1))){flag = true;}if(isWhite(binaryBufferedImage.getRGB(x+1, y+1)) && isWhite(binaryBufferedImage.getRGB(x-1, y-1))){flag = true;} if(flag){binaryBufferedImage.setRGB(x,y,-1); }}}}// 矩阵打印 for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { if (isBlack(binaryBufferedImage.getRGB(x, y))) { System.out.print("*"); } else { System.out.print(" "); } } System.out.println(); } ImageIO.write(binaryBufferedImage, "jpg", new File(destDir, sfile .getName())); } public static boolean isBlack(int colorInt) { Color color = new Color(colorInt); if (color.getRed() + color.getGreen() + color.getBlue() <= 300) { return true; } return false; } public static boolean isWhite(int colorInt) { Color color = new Color(colorInt); if (color.getRed() + color.getGreen() + color.getBlue() > 300) { return true; } return false; } public static int isBlackOrWhite(int colorInt) { if (getColorBright(colorInt) < 30 || getColorBright(colorInt) > 730) { return 1; } return 0; } public static int getColorBright(int colorInt) { Color color = new Color(colorInt); return color.getRed() + color.getGreen() + color.getBlue(); } public static int ostu(int[][] gray, int w, int h) { int[] histData = new int[w * h]; // Calculate histogram for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { int red = 0xFF & gray[x][y]; histData[red]++; } } // Total number of pixels int total = w * h; float sum = 0; for (int t = 0; t < 256; t++) sum += t * histData[t]; float sumB = 0; int wB = 0; int wF = 0; float varMax = 0; int threshold = 0; for (int t = 0; t < 256; t++) { wB += histData[t]; // Weight Background if (wB == 0) continue; wF = total - wB; // Weight Foreground if (wF == 0) break; sumB += (float) (t * histData[t]); float mB = sumB / wB; // Mean Background float mF = (sum - sumB) / wF; // Mean Foreground // Calculate Between Class Variance float varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF); // Check if new maximum found if (varBetween > varMax) { varMax = varBetween; threshold = t; } } return threshold; }
}
package com.gazgeek.helloworld.controller;import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;import javax.imageio.ImageIO;public class CopyOfCleanLines {public static void main(String[] args) throws IOException{File testDataDir = new File("F:\\ocr");final String destDir = testDataDir.getAbsolutePath()+"/tmp";for (File file : testDataDir.listFiles()){cleanLinesInImage(file, destDir);cleanLinesInImage(file, destDir);cleanLinesInImage(file, destDir);}}/**** @param sfile* 需要去噪的图像* @param destDir* 去噪后的图像保存地址* @throws IOException*/public static void cleanLinesInImage(File sfile, String destDir) throws IOException{File destF = new File(destDir);if (!destF.exists()){destF.mkdirs();}BufferedImage bufferedImage = ImageIO.read(sfile);int h = bufferedImage.getHeight();int w = bufferedImage.getWidth();// 灰度化int[][] gray = new int[w][h];for (int x = 0; x < w; x++){for (int y = 0; y < h; y++){int argb = bufferedImage.getRGB(x, y);// 图像加亮(调整亮度识别率非常高)int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30);int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30);int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30);if (r >= 255){r = 255;}if (g >= 255){g = 255;}if (b >= 255){b = 255;}gray[x][y] = (int) Math.pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2)* 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2);}}// 二值化int threshold = ostu(gray, w, h);BufferedImage binaryBufferedImage = new BufferedImage(w, h, BufferedImage.TYPE_BYTE_BINARY);for (int x = 0; x < w; x++){for (int y = 0; y < h; y++){if (gray[x][y] > threshold){gray[x][y] |= 0x00FFFF;} else{gray[x][y] &= 0xFF0000;}binaryBufferedImage.setRGB(x, y, gray[x][y]);}}//去除干扰线条for(int y = 1; y < h-1; y++){for(int x = 1; x < w-1; x++){boolean flag = false ;if(isBlack(binaryBufferedImage.getRGB(x, y))){//左右均为空时,去掉此点if(isWhite(binaryBufferedImage.getRGB(x-1, y)) && isWhite(binaryBufferedImage.getRGB(x+1, y))){flag = true;}//上下均为空时,去掉此点if(isWhite(binaryBufferedImage.getRGB(x, y+1)) && isWhite(binaryBufferedImage.getRGB(x, y-1))){flag = true;}//斜上下为空时,去掉此点if(isWhite(binaryBufferedImage.getRGB(x-1, y+1)) && isWhite(binaryBufferedImage.getRGB(x+1, y-1))){flag = true;}if(isWhite(binaryBufferedImage.getRGB(x+1, y+1)) && isWhite(binaryBufferedImage.getRGB(x-1, y-1))){flag = true;}if(flag){binaryBufferedImage.setRGB(x,y,-1);}}}}// 矩阵打印for (int y = 0; y < h; y++){for (int x = 0; x < w; x++){if (isBlack(binaryBufferedImage.getRGB(x, y))){System.out.print("*");} else{System.out.print(" ");}}System.out.println();}ImageIO.write(binaryBufferedImage, "jpg", new File(destDir, sfile.getName()));}public static boolean isBlack(int colorInt){Color color = new Color(colorInt);if (color.getRed() + color.getGreen() + color.getBlue() <= 300){return true;}return false;}public static boolean isWhite(int colorInt){Color color = new Color(colorInt);if (color.getRed() + color.getGreen() + color.getBlue() > 300){return true;}return false;}public static int isBlackOrWhite(int colorInt){if (getColorBright(colorInt) < 30 || getColorBright(colorInt) > 730){return 1;}return 0;}public static int getColorBright(int colorInt){Color color = new Color(colorInt);return color.getRed() + color.getGreen() + color.getBlue();}public static int ostu(int[][] gray, int w, int h){int[] histData = new int[w * h];// Calculate histogramfor (int x = 0; x < w; x++){for (int y = 0; y < h; y++){int red = 0xFF & gray[x][y];histData[red]++;}}// Total number of pixelsint total = w * h;float sum = 0;for (int t = 0; t < 256; t++)sum += t * histData[t];float sumB = 0;int wB = 0;int wF = 0;float varMax = 0;int threshold = 0;for (int t = 0; t < 256; t++){wB += histData[t]; // Weight Backgroundif (wB == 0)continue;wF = total - wB; // Weight Foregroundif (wF == 0)break;sumB += (float) (t * histData[t]);float mB = sumB / wB; // Mean Backgroundfloat mF = (sum - sumB) / wF; // Mean Foreground// Calculate Between Class Variancefloat varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF);// Check if new maximum foundif (varBetween > varMax){varMax = varBetween;threshold = t;}}return threshold;}
}