java 图片识别 tess4j_JAVA使用Tess4J进行ocr识别Java中@

Tess4J是对Tesseract OCR API.的Java JNA 封装。使java能够通过调用Tess4J的API来使用Tesseract OCR。支持的格式:TIFF,JPEG,GIF,PNG,BMP,JPEG,and PDF

Tesseract 的github地址:https://github.com/tesseract-ocr/tesseract

Tess4J的github地址:https://github.com/nguyenq/tess4j

Tess4J API 提供的功能:

1、直接识别支持的文件

2、识别图片流

3、识别图片的某块区域

4、将识别结果保存为 TEXT/ HOCR/ PDF/ UNLV/ BOX

5、通过设置取词的等级,提取识别出来的文字

6、获得每一个识别区域的具体坐标范围

7、调整倾斜的图片

8、裁剪图片

9、调整图片分辨率

10、从粘贴板获得图像

11、克隆一个图像目的:创建一份一模一样的图片,与原图在操作修改上,不相 互影响)

12、图片转换为二进制、黑白图像、灰度图像

13、反转图片颜色

demo.java:

/**

* Test of doOCR method, of class Tesseract.

* 根据图片文件进行识别

* @throws Exception while processing image.

*/

@Test

public void testDoOCR_File) throws Exception {

logger.info“doOCR on a jpg image”);

File imageFile = new Filethis.testResourcesDataPath, “ocr.png”);

//set language

instance.setDatapathtestResourcesLanguagePath);

instance.setLanguage“chi_sim”);

String result = instance.doOCRimageFile);

logger.inforesult);

}

/**

* Test of doOCR method, of class Tesseract.

* 根据图片流进行识别

* @throws Exception while processing image.

*/

@Test

public void testDoOCR_BufferedImage) throws Exception {

logger.info“doOCR on a buffered image of a PNG”);

File imageFile = new Filethis.testResourcesDataPath, “ocr.png”);

BufferedImage bi = ImageIO.readimageFile);

//set language

instance.setDatapathtestResourcesLanguagePath);

instance.setLanguage“chi_sim”);

String result = instance.doOCRbi);

logger.inforesult);

}

/**

* Test of getSegmentedRegions method, of class Tesseract.

* 得到每一个划分区域的具体坐标

* @throws java.lang.Exception

*/

@Test

public void testGetSegmentedRegions) throws Exception {

logger.info“getSegmentedRegions at given TessPageIteratorLevel”);

File imageFile = new FiletestResourcesDataPath, “ocr.png”);

BufferedImage bi = ImageIO.readimageFile);

int level = TessPageIteratorLevel.RIL_SYMBOL;

logger.info“PageIteratorLevel: ” + Utils.getConstantNamelevel, TessPageIteratorLevel.class));

List result = instance.getSegmentedRegionsbi, level);

for int i = 0; i < result.size); i++) {

Rectangle rect = result.geti);

logger.infoString.format“Box[%d]: x=%d, y=%d, w=%d, h=%d”, i, rect.x, rect.y, rect.width, rect.height));

}

assertTrueresult.size) > 0);

}

/**

* Test of doOCR method, of class Tesseract.

* 根据定义坐标范围进行识别

* @throws Exception while processing image.

*/

@Test

public void testDoOCR_File_Rectangle) throws Exception {

logger.info“doOCR on a BMP image with bounding rectangle”);

File imageFile = new Filethis.testResourcesDataPath, “ocr.png”);

//设置语言库

instance.setDatapathtestResourcesLanguagePath);

instance.setLanguage“chi_sim”);

//划定区域

// x,y是以左上角为原点,width和height是以xy为基础

Rectangle rect = new Rectangle84, 21, 15, 13);

String result = instance.doOCRimageFile, rect);

logger.inforesult);

}

/**

* Test of createDocuments method, of class Tesseract.

* 存储结果

* @throws java.lang.Exception

*/

@Test

public void testCreateDocuments) throws Exception {

logger.info“createDocuments for png”);

File imageFile = new Filethis.testResourcesDataPath, “ocr.png”);

String outputbase = “target/test-classes/docrenderer-2”;

List formats = new ArrayListArrays.asListRenderedFormat.HOCR, RenderedFormat.TEXT));

//设置语言库

instance.setDatapathtestResourcesLanguagePath);

instance.setLanguage“chi_sim”);

instance.createDocumentsnew String[]{imageFile.getPath)}, new String[]{outputbase}, formats);

}

/**

* Test of getWords method, of class Tesseract.

* 取词方法

* @throws java.lang.Exception

*/

@Test

public void testGetWords) throws Exception {

logger.info“getWords”);

File imageFile = new Filethis.testResourcesDataPath, “ocr.png”);

//设置语言库

instance.setDatapathtestResourcesLanguagePath);

instance.setLanguage“chi_sim”);

//按照每个字取词

int pageIteratorLevel = TessPageIteratorLevel.RIL_SYMBOL;

logger.info“PageIteratorLevel: ” + Utils.getConstantNamepageIteratorLevel, TessPageIteratorLevel.class));

BufferedImage bi = ImageIO.readimageFile);

List result = instance.getWordsbi, pageIteratorLevel);

//print the complete result

for Word word : result) {

logger.infoword.toString));

}

}

/**

* Test of Invalid memory access.

* 处理倾斜

* @throws Exception while processing image.

*/

@Test

public void testDoOCR_SkewedImage) throws Exception {

//设置语言库

instance.setDatapathtestResourcesLanguagePath);

instance.setLanguage“chi_sim”);

logger.info“doOCR on a skewed PNG image”);

File imageFile = new Filethis.testResourcesDataPath, “ocr_skewed.jpg”);

BufferedImage bi = ImageIO.readimageFile);

ImageDeskew id = new ImageDeskewbi);

double imageSkewAngle = id.getSkewAngle); // determine skew angle

if imageSkewAngle > MINIMUM_DESKEW_THRESHOLD || imageSkewAngle < -MINIMUM_DESKEW_THRESHOLD))) {

bi = ImageHelper.rotateImagebi, -imageSkewAngle); // deskew image

}

String result = instance.doOCRbi);

logger.inforesult);

}

Published by

风君子

独自遨游何稽首 揭天掀地慰生平

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注