Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
intmatcher.h
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.h
3  ** Purpose: Interface to high level generic classifier routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 #ifndef INTMATCHER_H
19 #define INTMATCHER_H
20 
21 #include "params.h"
22 
23 // Character fragments could be present in the trained templaes
24 // but turned on/off on the language-by-language basis or depending
25 // on particular properties of the corpus (e.g. when we expect the
26 // images to have low exposure).
27 extern BOOL_VAR_H(disable_character_fragments, FALSE,
28  "Do not include character fragments in the"
29  " results of the classifier");
30 
31 extern INT_VAR_H(classify_integer_matcher_multiplier, 14,
32  "Integer Matcher Multiplier 0-255: ");
33 
34 
38 #include "intproto.h"
39 #include "cutoffs.h"
40 
46 };
47 
49 
50 
55 };
56 
58 
59 /*----------------------------------------------------------------------------
60  Variables
61 -----------------------------------------------------------------------------*/
62 
63 extern INT_VAR_H(classify_adapt_proto_thresh, 230,
64  "Threshold for good protos during adaptive 0-255: ");
65 
66 extern INT_VAR_H(classify_adapt_feature_thresh, 230,
67  "Threshold for good features during adaptive 0-255: ");
68 
73 #define SE_TABLE_BITS 9
74 #define SE_TABLE_SIZE 512
75 
80 
81  void Clear(const INT_CLASS class_template);
82  void ClearFeatureEvidence(const INT_CLASS class_template);
83  void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures,
84  inT32 used_features);
86  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures);
87 };
88 
89 
91  public:
92  // Integer Matcher Theta Fudge (0-255).
93  static const int kIntThetaFudge = 128;
94  // Bits in Similarity to Evidence Lookup (8-9).
95  static const int kEvidenceTableBits = 9;
96  // Integer Evidence Truncation Bits (8-14).
97  static const int kIntEvidenceTruncBits = 14;
98  // Similarity to Evidence Table Exponential Multiplier.
99  static const float kSEExponentialMultiplier;
100  // Center of Similarity Curve.
101  static const float kSimilarityCenter;
102 
103  IntegerMatcher() : classify_debug_level_(0) {}
104 
105  void Init(tesseract::IntParam *classify_debug_level,
106  int classify_integer_matcher_multiplier);
107 
108  void SetBaseLineMatch();
109  void SetCharNormMatch(int integer_matcher_multiplier);
110 
111  void Match(INT_CLASS ClassTemplate,
112  BIT_VECTOR ProtoMask,
113  BIT_VECTOR ConfigMask,
114  inT16 NumFeatures,
115  const INT_FEATURE_STRUCT* Features,
116  INT_RESULT Result,
117  int AdaptFeatureThreshold,
118  int Debug,
119  bool SeparateDebugWindows);
120 
121  // Applies the CN normalization factor to the given rating and returns
122  // the modified rating.
123  float ApplyCNCorrection(float rating, int blob_length,
124  int normalization_factor);
125 
126  int FindGoodProtos(INT_CLASS ClassTemplate,
127  BIT_VECTOR ProtoMask,
128  BIT_VECTOR ConfigMask,
129  uinT16 BlobLength,
130  inT16 NumFeatures,
131  INT_FEATURE_ARRAY Features,
132  PROTO_ID *ProtoArray,
133  int AdaptProtoThreshold,
134  int Debug);
135 
136  int FindBadFeatures(INT_CLASS ClassTemplate,
137  BIT_VECTOR ProtoMask,
138  BIT_VECTOR ConfigMask,
139  uinT16 BlobLength,
140  inT16 NumFeatures,
141  INT_FEATURE_ARRAY Features,
142  FEATURE_ID *FeatureArray,
143  int AdaptFeatureThreshold,
144  int Debug);
145 
146  private:
147  int UpdateTablesForFeature(
148  INT_CLASS ClassTemplate,
149  BIT_VECTOR ProtoMask,
150  BIT_VECTOR ConfigMask,
151  int FeatureNum,
152  const INT_FEATURE_STRUCT* Feature,
153  ScratchEvidence *evidence,
154  int Debug);
155 
156  int FindBestMatch(INT_CLASS ClassTemplate,
157  const ScratchEvidence &tables,
158  INT_RESULT Result);
159 
160 #ifndef GRAPHICS_DISABLED
161  void DebugFeatureProtoError(
162  INT_CLASS ClassTemplate,
163  BIT_VECTOR ProtoMask,
164  BIT_VECTOR ConfigMask,
165  const ScratchEvidence &tables,
166  inT16 NumFeatures,
167  int Debug);
168 
169  void DisplayProtoDebugInfo(
170  INT_CLASS ClassTemplate,
171  BIT_VECTOR ProtoMask,
172  BIT_VECTOR ConfigMask,
173  const ScratchEvidence &tables,
174  bool SeparateDebugWindows);
175 
176  void DisplayFeatureDebugInfo(
177  INT_CLASS ClassTemplate,
178  BIT_VECTOR ProtoMask,
179  BIT_VECTOR ConfigMask,
180  inT16 NumFeatures,
181  const INT_FEATURE_STRUCT* Features,
182  int AdaptFeatureThreshold,
183  int Debug,
184  bool SeparateDebugWindows);
185 
186  void DebugBestMatch(int BestMatch, INT_RESULT Result);
187 #endif
188 
189 
190  private:
191  uinT8 similarity_evidence_table_[SE_TABLE_SIZE];
192  uinT32 evidence_table_mask_;
193  uinT32 mult_trunc_shift_bits_;
194  uinT32 table_trunc_shift_bits_;
195  inT16 local_matcher_multiplier_;
196  tesseract::IntParam *classify_debug_level_;
197  uinT32 evidence_mult_mask_;
198 };
199 
203 void IMDebugConfiguration(INT_FEATURE FeatureNum,
204  uinT16 ActualProtoNum,
205  uinT8 Evidence,
206  BIT_VECTOR ConfigMask,
207  uinT32 ConfigWord);
208 
209 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
210  uinT8 *FeatureEvidence,
211  inT32 ConfigCount);
212 
213 void HeapSort (int n, register int ra[], register int rb[]);
214 
218 #endif