Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
boxread.h
Go to the documentation of this file.
1
/**********************************************************************
2
* File: boxread.cpp
3
* Description: Read data from a box file.
4
* Author: Ray Smith
5
* Created: Fri Aug 24 17:47:23 PDT 2007
6
*
7
* (C) Copyright 2007, Google Inc.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
*
18
**********************************************************************/
19
20
#ifndef TESSERACT_CCUTIL_BOXREAD_H__
21
#define TESSERACT_CCUTIL_BOXREAD_H__
22
23
#include <stdio.h>
24
#include "
strngs.h
"
25
26
class
STRING
;
27
class
TBOX
;
28
29
// Size of buffer used to read a line from a box file.
30
const
int
kBoxReadBufSize
= 1024;
31
32
// Open the boxfile based on the given image filename.
33
FILE*
OpenBoxFile
(
const
STRING
& fname);
34
35
// ReadNextBox factors out the code to interpret a line of a box
36
// file so that applybox and unicharset_extractor interpret the same way.
37
// This function returns the next valid box file utf8 string and coords
38
// and returns true, or false on eof (and closes the file).
39
// It ignores the utf8 file signature ByteOrderMark (U+FEFF=EF BB BF), checks
40
// for valid utf-8 and allows space or tab between fields.
41
// utf8_str is set with the unichar string, and bounding box with the box.
42
// If there are page numbers in the file, it reads them all.
43
bool
ReadNextBox
(
int
*line_number, FILE* box_file,
44
STRING
* utf8_str,
TBOX
* bounding_box);
45
// As ReadNextBox above, but get a specific page number. (0-based)
46
// Use -1 to read any page number. Files without page number all
47
// read as if they are page 0.
48
bool
ReadNextBox
(
int
target_page,
int
*line_number, FILE* box_file,
49
STRING
* utf8_str,
TBOX
* bounding_box);
50
51
// Parses the given box file string into a page_number, utf8_str, and
52
// bounding_box. Returns true on a successful parse.
53
bool
ParseBoxFileStr
(
const
char
* boxfile_str,
int
* page_number,
54
STRING
* utf8_str,
TBOX
* bounding_box);
55
56
// Creates a box file string from a unichar string, TBOX and page number.
57
void
MakeBoxFileStr
(
const
char
* unichar_str,
const
TBOX
& box,
int
page_num,
58
STRING
* box_str);
59
60
#endif // TESSERACT_CCUTIL_BOXREAD_H__
mnt
data
src
tesseract-ocr
ccstruct
boxread.h
Generated on Thu Nov 1 2012 20:19:44 for Tesseract by
1.8.1