Example code taken from the Tesseract's github wiki page.
- See also
- https://github.com/tesseract-ocr/tesseract/wiki/APIExample
Basic example
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
int main()
{
char *outText;
if (api->
Init(NULL,
"eng"))
{
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
printf("OCR output:\n%s", outText);
delete [] outText;
pixDestroy(&image);
return 0;
}
GetComponentImages example
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
printf("Found %d textline image components.\n", boxes->n);
for (int i = 0; i < boxes->n; i++)
{
BOX* box = boxaGetBox(boxes, i, L_CLONE);
fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box->x, box->y, box->w, box->h, conf, ocrResult);
}
Result iterator example
It is possible to get confidence value and BoundingBox per word from a ResultIterator:
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
if (ri != 0)
{
do
{
int x1, y1, x2, y2;
printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n", word, conf, x1, y1, x2, y2);
delete [] word;
}
while (ri->
Next(level));
}
Orientation and script detection (OSD) example
const char* inputfile = "/usr/src/tesseract/testing/eurotext.tif";
float deskew_angle;
PIX *image = pixRead(inputfile);
api->
Init(
"/usr/src/tesseract/",
"eng");
it->
Orientation(&orientation, &direction, &order, &deskew_angle);
printf(
"Orientation: %d;\n"
"WritingDirection: %d\n"
"TextlineOrder: %d\n"
"Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
Example of iterator over the classifier choices for a single symbol
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
if (ri != 0)
{
do
{
if (symbol != 0)
{
printf("symbol %s, conf: %f", symbol, conf);
bool indent = false;
do
{
if (indent) printf("\t\t ");
printf("\t- ");
const char* choice = ci.GetUTF8Text();
printf("%s conf: %f\n", choice, ci.Confidence());
indent = true;
} while(ci.Next());
}
printf("---------------------------------------------\n");
delete [] symbol;
}
while((ri->
Next(level)));
}