站长原创,版权所有ITEEDU,2011-07-16
在进行测试的过程中,由于处于摸索和学习的阶段,所以在实现图片文字识别过程中,曾经历了好几个版本,下边简单地做以讲解。
#!bin/bash export /home/administrator/tesseract-ocr/bin: export /home/administrator/tesseract-ocr/lib:在终端输入
tesseract picture1.tif 5 -l chi_sim即可将图片picture1.tif中的文字信息提取出来,并放在5.txt文件中。
在测试阶段,我在eclipse中,修改了tesseract-ocr源码中的tesseractmain.cpp函数,得出了自己的程序代码,具体代码:
//读取图片,分析图片,提取其中的文字内容,输出到终端上来 #include <ctype.h> #include "applybox.h" #include "control.h" #include "tessvars.h" #include "tessedit.h" #include "baseapi.h" #include "thresholder.h" #include "pageres.h" #include "imgs.h" #include "varabled.h" #include "tprintf.h" #include "tesseractmain.h" #include "stderr.h" #include "notdll.h" #include "mainblk.h" #include "output.h" #include "globals.h" #include "helpers.h" #include "blread.h" #include "tfacep.h" #include "callnet.h" #include "strings.h" #include "varable.h" #include "tessclas.h" #include "notdll.h" #ifdef USING_GETTEXT #include <libintl.h> #include <locale.h> #define _(x) gettext(x) #else #define _(x) (x) #endif #ifdef HAVE_LIBTIFF #include "tiffio.h" #endif #ifdef HAVE_LIBLEPT #include "allheaders.h" #else class Pix; #endif #ifdef _TIFFIO_ void read_tiff_image(TIFF* tif, IMAGE* image); #endif const int kMaxIntSize = 22; char szAppName[] = "Tessedit"; //app name #define EXTERN BOOL_VAR(tessedit_create_boxfile, FALSE, "Output text with boxes"); BOOL_VAR(tessedit_create_hocr, FALSE, "Output HTML with hOCR markup"); BOOL_VAR(tessedit_read_image, TRUE, "Ensure the image is read"); INT_VAR(tessedit_serial_unlv, 0, "0->Whole page, 1->serial no adapt, 2->serial with adapt"); INT_VAR(tessedit_page_number, -1, "-1 -> All pages, else specific page to process"); BOOL_VAR(tessedit_write_images, FALSE, "Capture the image from the IPE"); BOOL_VAR(tessedit_debug_to_screen, FALSE, "Dont use debug file"); /* convert the input_file into the STRING*,and put it into the text_out */ void TesseractImage(const char* input_file, IMAGE* image, Pix* pix, int page_index, tesseract::TessBaseAPI* api, STRING* text_out) { api->SetInputName(input_file); #ifdef HAVE_LIBLEPT if (pix != NULL) { api->SetImage(pix); } else { #endif int bytes_per_line = check_legal_image_size(image->get_xsize(), image->get_ysize(), image->get_bpp()); api->SetImage(image->get_buffer(), image->get_xsize(), image->get_ysize(), image->get_bpp() / 8, bytes_per_line); #ifdef HAVE_LIBLEPT } #endif if (tessedit_serial_unlv == 0) { char* text; if (tessedit_create_boxfile) text = api->GetBoxText(page_index); else if (tessedit_write_unlv) text = api->GetUNLVText(); else if (tessedit_create_hocr) text = api->GetHOCRText(page_index + 1); else text = api->GetUTF8Text(); *text_out += text; delete [] text; } else { BLOCK_LIST blocks; STRING filename = input_file; const char* lastdot = strrchr(filename.string(), '.'); if (lastdot != NULL) { filename[lastdot - filename.string()] = '\0'; } if (!read_unlv_file(filename, image->get_xsize(), image->get_ysize(), &blocks)) { fprintf(stderr, _("Error: Must have a unlv zone file %s to read!\n"), filename.string()); return; } BLOCK_IT b_it = &blocks; for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOCK* block = b_it.data(); TBOX box = block->bounding_box(); api->SetRectangle(box.left(), image->get_ysize() - box.top(), box.width(), box.height()); char* text = api->GetUNLVText(); *text_out += text; delete [] text; if (tessedit_serial_unlv == 1) api->ClearAdaptiveClassifier(); } } if (tessedit_write_images) { page_image.write("tessinput.tif"); } } /*end TesseractImage*/ /*Ocr * */ char* Ocr(const char *input,char *output,const char* lang){ lang = "eng"; tesseract::TessBaseAPI api; api.Init(input,lang, 0, 0, false); IMAGE image; STRING text_out; int page_number = tessedit_page_number; if (page_number < 0){ page_number = 0; } FILE* fp = fopen(input, "rb"); if (fp == NULL) { tprintf(_("Image file %s cannot be opened!\n"),input); fclose(fp); exit(1); } #ifdef HAVE_LIBLEPT int page = page_number; int npages = 0; bool is_tiff = fileFormatIsTiff(fp); if (is_tiff) { int tiffstat = tiffGetCount(fp, &npages); if (tiffstat == 1) { fprintf (stderr, _("Error reading file %s!\n"),input); fclose(fp); exit(1); } //fprintf (stderr, "%d pages\n", npages); } fclose(fp); fp = NULL; Pix *pix; if (is_tiff) { for (; (pix = pixReadTiff(input, page)) != NULL; ++page) { if (page > 0) tprintf(_("Page %d\n"), page); char page_str[kMaxIntSize]; snprintf(page_str, kMaxIntSize - 1, "%d", page); api.SetVariable("applybox_page", page_str); // Run tesseract on the page! TesseractImage(input, NULL, pix, page, &api, &text_out); pixDestroy(&pix); if (tessedit_page_number >= 0 || npages == 1) { break; } } } else { // The file is not a tiff file, so use the general pixRead function. // If the image fails to read, try it as a list of filenames. PIX* pix = pixRead(input); if (pix == NULL) { FILE* fimg = fopen(input, "r"); if (fimg == NULL) { tprintf(_("File %s cannot be opened!\n"), input); fclose(fimg); exit(1); } char filename[MAX_PATH]; while (fgets(filename, sizeof(filename), fimg) != NULL) { chomp_string(filename); pix = pixRead(filename); if (pix == NULL) { tprintf(_("Image file %s cannot be read!\n"), filename); fclose(fimg); exit(1); } tprintf(_("Page %d : %s\n"), page, filename); TesseractImage(filename, NULL, pix, page, &api, &text_out); pixDestroy(&pix); ++page; } fclose(fimg); } else { TesseractImage(input, NULL, pix, 0, &api, &text_out); pixDestroy(&pix); } } #else #ifdef _TIFFIO_ int len = strlen(input); TIFF* archive = NULL; do { // Since libtiff keeps all read images in memory we have to close the // file and reopen it for every page, and seek to the appropriate page. if (archive != NULL) TIFFClose(archive); archive = TIFFOpen(input, "r"); if (archive == NULL) { tprintf(_("Read of file %s failed.\n"), input); exit(1); } if (page_number > 0) tprintf(_("Page %d\n"), page_number); // Seek to the appropriate page. for (int i = 0; i < page_number; ++i) { TIFFReadDirectory(archive); } char page_str[kMaxIntSize]; snprintf(page_str, kMaxIntSize - 1, "%d", page_number); api.SetVariable("applybox_page", page_str); // Read the current page into the Tesseract image. IMAGE image; read_tiff_image(archive, &image); // Run tesseract on the page! TesseractImage(input, &image, NULL, page_number, &api, &text_out); ++page_number; // Do this while there are more pages in the tiff file. } while (TIFFReadDirectory(archive) && (page_number <= tessedit_page_number || tessedit_page_number < 0)); TIFFClose(archive); } else { #endif // Using built-in image library to read bmp, or tiff without libtiff. if (image.read_header(input) < 0) { tprintf(_("Read of file %s failed.\n"), input); exit(1); } if (image.read(image.get_ysize ()) < 0) MEMORY_OUT.error("error", EXIT, _("Read of image %s"), input); invert_image(&image); TesseractImage(input, &image, NULL, 0, &api, &text_out); #ifdef _TIFFIO_ } delete[] ext; #endif #endif // HAVE_LIBLEPT char outs[900]; //output =(char*)malloc(strlen(text_out.string())+1); output=outs; memset(output,0,sizeof(output)); strcpy(output,text_out.string()); //free(output); return output; //Normal exit } //main int main(int argc,char ** argv){ const char* input="/home/administrator/donate.tif"; char * outs= 0; outs=Ocr(input,outs,"eng"); while(outs!=NULL){ printf("%c",*outs); outs++; } return 0; }
//初始化,读取图片,生成image对象,通过image对象获取图片的相关参数,调用//提取图片文字信息的接口函数 #include "tessedit.h" #include "baseapi.h" #include "imgs.h" #include "varabled.h" #include "tprintf.h" #include "tesseractmain.h" #include "stderr.h" #include "tessvars.h" #include <MagickWand.h> //#include <MagickCore.h> #include "convert.h"#define _(x) (x) char *ocr(char *input,char *output) { tesseract::TessBaseAPI api; const char* lang = "eng"; //eng为英文包,chi_sim为汉文包 api.Init("/tmp",lang, 0, 0, false);//init the language api.SetPageSegMode(tesseract::PSM_AUTO);//设置自动进行版面分析 IMAGE image; //在这里只是为了测试,所以还是从读取图片开始 if (image.read_header(input) < 0) {//读取文件中的元信息 tprintf(_("Read of file %s failed.\n"), input); exit(1); } if (image.read(image.get_ysize ()) < 0) MEMORY_OUT.error("test", EXIT, _("Read of image %s"), input); invert_image(&image); //图片读取结束,获取调用ocr接口所需要的参数 const unsigned char* imagedata = image.get_buffer(); int bits_per_pixel = image.get_bpp()/8; int bytes_per_line = check_legal_image_size(image.get_xsize(), image.get_ysize(), image.get_bpp()); int xsize = image.get_xsize(); int ysize = image.get_ysize(); //所需要的参数中left和top为0,代表从最左,最顶开始转换图片 //get.bpp(),return bits per pixel //get.bpp()/8,return bytes per pixel //调用ocr接口函数,并输出我们需要的字符串 output=api.TesseractRect(imagedata, bits_per_pixel, bytes_per_line, 0, 0, xsize, ysize); return output; } int main(int argc,char **argv) { char *input = "/home/administrator/tmp.bmp"; char *output =NULL; output = ocr(input,output); printf("%s",output); delete []output; return 0; //Normal exit } /*函数原型 * char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height);*/