tesseract  3.04.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
lm_state.h
Go to the documentation of this file.
1 // File: lm_state.h
3 // Description: Structures and functionality for capturing the state of
4 // segmentation search guided by the language model.
5 //
6 // Author: Rika Antonova
7 // Created: Mon Jun 20 11:26:43 PST 2012
8 //
9 // (C) Copyright 2012, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #ifndef TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
23 #define TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
24 
25 #include "associate.h"
26 #include "elst.h"
27 #include "dawg.h"
28 #include "lm_consistency.h"
29 #include "matrix.h"
30 #include "ratngs.h"
31 #include "stopper.h"
32 #include "strngs.h"
33 
34 namespace tesseract {
35 
36 // Used for expressing various language model flags.
37 typedef unsigned char LanguageModelFlagsType;
38 
39 // The following structs are used for storing the state of the language model
40 // in the segmentation search graph. In this graph the nodes are BLOB_CHOICEs
41 // and the links are the relationships between the underlying blobs (see
42 // segsearch.h for a more detailed description).
43 // Each of the BLOB_CHOICEs contains LanguageModelState struct, which has
44 // a list of N best paths (list of ViterbiStateEntry) explored by the Viterbi
45 // search leading up to and including this BLOB_CHOICE.
46 // Each ViterbiStateEntry contains information from various components of the
47 // language model: dawgs in which the path is found, character ngram model
48 // probability of the path, script/chartype/font consistency info, state for
49 // language-specific heuristics (e.g. hyphenated and compound words, lower/upper
50 // case preferences, etc).
51 // Each ViterbiStateEntry also contains the parent pointer, so that the path
52 // that it represents (WERD_CHOICE) can be constructed by following these
53 // parent pointers.
54 
55 // Struct for storing additional information used by Dawg language model
56 // component. It stores the set of active dawgs in which the sequence of
57 // letters on a path can be found.
61  }
63  delete active_dawgs;
64  }
67 };
68 
69 // Struct for storing additional information used by Ngram language model
70 // component.
72  LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
75  STRING context; // context string
76  // Length of the context measured by advancing using UNICHAR::utf8_step()
77  // (should be at most the order of the character ngram model used).
79  // The paths with pruned set are pruned out from the perspective of the
80  // character ngram model. They are explored further because they represent
81  // a dictionary match or a top choice. Thus ngram_info is still computed
82  // for them in order to calculate the combined cost.
83  bool pruned;
84  // -ln(P_ngram_model(path))
85  float ngram_cost;
86  // -[ ln(P_classifier(path)) + scale_factor * ln(P_ngram_model(path)) ]
88 };
89 
90 // Struct for storing the information about a path in the segmentation graph
91 // explored by Viterbi search.
92 struct ViterbiStateEntry : public ELIST_LINK {
94  BLOB_CHOICE *b, float c, float ol,
95  const LMConsistencyInfo &ci,
96  const AssociateStats &as,
100  const char *debug_uch)
101  : cost(c), curr_b(b), parent_vse(pe), competing_vse(NULL),
102  ratings_sum(b->rating()),
103  min_certainty(b->certainty()), adapted(b->IsAdapted()), length(1),
106  updated(true) {
107  debug_str = (debug_uch == NULL) ? NULL : new STRING();
108  if (pe != NULL) {
109  ratings_sum += pe->ratings_sum;
110  if (pe->min_certainty < min_certainty) {
112  }
113  adapted += pe->adapted;
114  length += pe->length;
116  if (debug_uch != NULL) *debug_str += *(pe->debug_str);
117  }
118  if (debug_str != NULL && debug_uch != NULL) *debug_str += debug_uch;
119  }
121  delete dawg_info;
122  delete ngram_info;
123  delete debug_str;
124  }
125  // Comparator function for sorting ViterbiStateEntry_LISTs in
126  // non-increasing order of costs.
127  static int Compare(const void *e1, const void *e2) {
128  const ViterbiStateEntry *ve1 =
129  *reinterpret_cast<const ViterbiStateEntry * const *>(e1);
130  const ViterbiStateEntry *ve2 =
131  *reinterpret_cast<const ViterbiStateEntry * const *>(e2);
132  return (ve1->cost < ve2->cost) ? -1 : 1;
133  }
134  inline bool Consistent() const {
136  return true;
137  }
138  return consistency_info.Consistent();
139  }
140  // Returns true if this VSE has an alphanumeric character as its classifier
141  // result.
142  bool HasAlnumChoice(const UNICHARSET& unicharset) {
143  if (curr_b == NULL) return false;
144  UNICHAR_ID unichar_id = curr_b->unichar_id();
145  if (unicharset.get_isalpha(unichar_id) ||
146  unicharset.get_isdigit(unichar_id))
147  return true;
148  return false;
149  }
150  void Print(const char *msg) const;
151 
152  // The cost is an adjusted ratings sum, that is adjusted by all the language
153  // model components that use Viterbi search.
154  float cost;
155 
156  // Pointers to BLOB_CHOICE and parent ViterbiStateEntry (not owned by this).
159  // Pointer to a case-competing ViterbiStateEntry in the same list that
160  // represents a path ending in the same letter of the opposite case.
162 
163  // Various information about the characters on the path represented
164  // by this ViterbiStateEntry.
165  float ratings_sum; // sum of ratings of character on the path
166  float min_certainty; // minimum certainty on the path
167  int adapted; // number of BLOB_CHOICES from adapted templates
168  int length; // number of characters on the path
169  float outline_length; // length of the outline so far
170  LMConsistencyInfo consistency_info; // path consistency info
171  AssociateStats associate_stats; // character widths/gaps/seams
172 
173  // Flags for marking the entry as a top choice path with
174  // the smallest rating or lower/upper case letters).
176 
177  // Extra information maintained by Dawg laguage model component
178  // (owned by ViterbiStateEntry).
180 
181  // Extra information maintained by Ngram laguage model component
182  // (owned by ViterbiStateEntry).
184 
185  bool updated; // set to true if the entry has just been created/updated
186  // UTF8 string representing the path corresponding to this vse.
187  // Populated only in when language_model_debug_level > 0.
189 };
190 
192 
193 // Struct to store information maintained by various language model components.
200 
201  // Clears the viterbi search state back to its initial conditions.
202  void Clear();
203 
204  void Print(const char *msg);
205 
206  // Storage for the Viterbi state.
207  ViterbiStateEntry_LIST viterbi_state_entries;
208  // Number and max cost of prunable paths in viterbi_state_entries.
211  // Total number of entries in viterbi_state_entries.
213 };
214 
215 // Bundle together all the things pertaining to the best choice/state.
217  explicit BestChoiceBundle(int matrix_dimension)
218  : updated(false), best_vse(NULL) {
219  beam.reserve(matrix_dimension);
220  for (int i = 0; i < matrix_dimension; ++i)
221  beam.push_back(new LanguageModelState);
222  }
224 
225  // Flag to indicate whether anything was changed.
226  bool updated;
227  // Places to try to fix the word suggested by ambiguity checking.
229  // The beam. One LanguageModelState containing a list of ViterbiStateEntry per
230  // row in the ratings matrix containing all VSEs whose BLOB_CHOICE is
231  // somewhere in the corresponding row.
233  // Best ViterbiStateEntry and BLOB_CHOICE.
235 };
236 
237 } // namespace tesseract
238 
239 #endif // TESSERACT_WORDREC_LANGUAGE_MODEL_DEFS_H_
unsigned char LanguageModelFlagsType
Definition: lm_state.h:37
LMConsistencyInfo consistency_info
Definition: lm_state.h:170
ViterbiStateEntry * best_vse
Definition: lm_state.h:234
ViterbiStateEntry(ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, const LMConsistencyInfo &ci, const AssociateStats &as, LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, const char *debug_uch)
Definition: lm_state.h:93
ELISTIZEH(AmbigSpec)
PermuterType
Definition: ratngs.h:240
void Print(const char *msg)
Definition: lm_state.cpp:70
LanguageModelNgramInfo * ngram_info
Definition: lm_state.h:183
LanguageModelDawgInfo * dawg_info
Definition: lm_state.h:179
LanguageModelDawgInfo(DawgPositionVector *a, PermuterType pt)
Definition: lm_state.h:59
ViterbiStateEntry * parent_vse
Definition: lm_state.h:158
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:470
LanguageModelFlagsType top_choice_flags
Definition: lm_state.h:175
bool Consistent() const
Definition: lm_state.h:134
bool HasAlnumChoice(const UNICHARSET &unicharset)
Definition: lm_state.h:142
AssociateStats associate_stats
Definition: lm_state.h:171
ParamsEditor * pe
Definition: pgedit.cpp:108
DawgPositionVector * active_dawgs
Definition: lm_state.h:65
LanguageModelNgramInfo(const char *c, int l, bool p, float nc, float ncc)
Definition: lm_state.h:72
int UNICHAR_ID
Definition: unichar.h:33
static int Compare(const void *e1, const void *e2)
Definition: lm_state.h:127
float viterbi_state_entries_prunable_max_cost
Definition: lm_state.h:210
PointerVector< LanguageModelState > beam
Definition: lm_state.h:232
ViterbiStateEntry * competing_vse
Definition: lm_state.h:161
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
#define MAX_FLOAT32
Definition: host.h:124
Definition: strngs.h:44
void Print(const char *msg) const
Definition: lm_state.cpp:27
#define NULL
Definition: host.h:144
ViterbiStateEntry_LIST viterbi_state_entries
Definition: lm_state.h:207
BestChoiceBundle(int matrix_dimension)
Definition: lm_state.h:217
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76