tesseract  3.04.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::Classify Class Reference

#include <classify.h>

Inheritance diagram for tesseract::Classify:
tesseract::CCStruct tesseract::CUtil tesseract::CCUtil tesseract::Wordrec tesseract::Tesseract

Public Member Functions

 Classify ()
 
virtual ~Classify ()
 
DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uinT8 ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (FILE *File)
 
FLOAT32 ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (FILE *File, inT64 end_offset)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (bool load_pre_trained_templates)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (FLOAT32 Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uinT8 *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (FILE *File)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const STRING &filename)
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()
 
 ~CCStruct ()
 
- Public Member Functions inherited from tesseract::CUtil
 CUtil ()
 
 ~CUtil ()
 
void read_variables (const char *filename, bool global_only)
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 

Static Public Member Functions

static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 

Public Attributes

bool allow_blob_division = true
 
bool prioritize_division = FALSE
 
int tessedit_single_match = FALSE
 
bool classify_enable_learning = true
 
int classify_debug_level = 0
 
int classify_norm_method = character
 
double classify_char_norm_range = 0.2
 
double classify_min_norm_scale_x = 0.0
 
double classify_max_norm_scale_x = 0.325
 
double classify_min_norm_scale_y = 0.0
 
double classify_max_norm_scale_y = 0.325
 
double classify_max_rating_ratio = 1.5
 
double classify_max_certainty_margin = 5.5
 
bool tess_cn_matching = 0
 
bool tess_bn_matching = 0
 
bool classify_enable_adaptive_matcher = 1
 
bool classify_use_pre_adapted_templates = 0
 
bool classify_save_adapted_templates = 0
 
bool classify_enable_adaptive_debugger = 0
 
bool classify_nonlinear_norm = 0
 
int matcher_debug_level = 0
 
int matcher_debug_flags = 0
 
int classify_learning_debug_level = 0
 
double matcher_good_threshold = 0.125
 
double matcher_reliable_adaptive_result = 0.0
 
double matcher_perfect_threshold = 0.02
 
double matcher_bad_match_pad = 0.15
 
double matcher_rating_margin = 0.1
 
double matcher_avg_noise_size = 12.0
 
int matcher_permanent_classes_min = 1
 
int matcher_min_examples_for_prototyping = 3
 
int matcher_sufficient_examples_for_prototyping = 5
 
double matcher_clustering_max_angle_delta = 0.015
 
double classify_misfit_junk_penalty = 0.0
 
double rating_scale = 1.5
 
double certainty_scale = 20.0
 
double tessedit_class_miss_scale = 0.00390625
 
double classify_adapted_pruning_factor = 2.5
 
double classify_adapted_pruning_threshold = -1.0
 
int classify_adapt_proto_threshold = 230
 
int classify_adapt_feature_threshold = 230
 
bool disable_character_fragments = TRUE
 
double classify_character_fragments_garbage_certainty_threshold = -3.0
 
bool classify_debug_character_fragments = FALSE
 
bool matcher_debug_separate_windows = FALSE
 
char * classify_learn_debug_str = ""
 
int classify_class_pruner_threshold = 229
 
int classify_class_pruner_multiplier = 15
 
int classify_cp_cutoff_strength = 7
 
int classify_integer_matcher_multiplier = 10
 
INT_TEMPLATES PreTrainedTemplates
 
ADAPT_TEMPLATES AdaptedTemplates
 
ADAPT_TEMPLATES BackupAdaptedTemplates
 
BIT_VECTOR AllProtosOn
 
BIT_VECTOR AllConfigsOn
 
BIT_VECTOR AllConfigsOff
 
BIT_VECTOR TempProtoMask
 
bool EnableLearning
 
NORM_PROTOSNormProtos
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
int il1_adaption_test = 0
 
bool classify_bln_numeric_mode = 0
 
double speckle_large_max_size = 0.30
 
double speckle_rating_penalty = 10.0
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
TessdataManager tessdata_manager
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
char * m_data_sub_dir = "tessdata/"
 
int ambigs_debug_level = 0
 
bool use_definite_ambigs_for_classifier = 0
 
bool use_ambigs_for_adaption = 0
 

Protected Attributes

IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 

Detailed Description

Definition at line 61 of file classify.h.

Constructor & Destructor Documentation

tesseract::Classify::Classify ( )

Definition at line 35 of file classify.cpp.

36  : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
37  this->params()),
39  "Prioritize blob division over chopping", this->params()),
40  INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP",
41  this->params()),
42  BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
43  this->params()),
44  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
45  this->params()),
46  INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
47  this->params()),
49  "Character Normalization Range ...", this->params()),
50  double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...",
51  this->params()), /* PREV DEFAULT 0.1 */
53  "Max char x-norm scale ...",
54  this->params()), /* PREV DEFAULT 0.3 */
55  double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...",
56  this->params()), /* PREV DEFAULT 0.1 */
58  "Max char y-norm scale ...",
59  this->params()), /* PREV DEFAULT 0.3 */
61  "Veto ratio between classifier ratings", this->params()),
63  "Veto difference between classifier certainties",
64  this->params()),
65  BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
66  this->params()),
67  BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
68  this->params()),
70  "Enable adaptive classifier", this->params()),
72  "Use pre-adapted classifier templates", this->params()),
74  "Save adapted templates to a file", this->params()),
75  BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
76  this->params()),
78  "Non-linear stroke-density normalization", this->params()),
79  INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
80  INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
81  INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
82  this->params()),
83  double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
84  this->params()),
85  double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
86  this->params()),
87  double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
88  this->params()),
89  double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
90  this->params()),
91  double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
92  this->params()),
93  double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
94  this->params()),
95  INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
96  this->params()),
98  "Reliable Config Threshold", this->params()),
100  "Enable adaption even if the ambiguities have not been seen",
101  this->params()),
103  "Maximum angle delta for prototype clustering",
104  this->params()),
106  "Penalty to apply when a non-alnum is vertically out of "
107  "its expected textline position",
108  this->params()),
109  double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
110  double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
111  this->params()),
113  "Scale factor for features not used", this->params()),
116  "Prune poor adapted results this much worse than best result",
117  this->params()),
119  "Threshold at which classify_adapted_pruning_factor starts",
120  this->params()),
122  "Threshold for good protos during adaptive 0-255",
123  this->params()),
125  "Threshold for good features during adaptive 0-255",
126  this->params()),
128  "Do not include character fragments in the"
129  " results of the classifier",
130  this->params()),
132  -3.0,
133  "Exclude fragments that do not look like whole"
134  " characters from training and adaption",
135  this->params()),
137  "Bring up graphical debugging windows for fragments training",
138  this->params()),
140  "Use two different windows for debugging the matching: "
141  "One for the protos and one for the features.",
142  this->params()),
143  STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
144  this->params()),
146  "Class Pruner Threshold 0-255", this->params()),
148  "Class Pruner Multiplier 0-255: ", this->params()),
150  "Class Pruner CutoffStrength: ", this->params()),
152  "Integer Matcher Multiplier 0-255: ", this->params()),
153  EnableLearning(true),
154  INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word",
155  this->params()),
157  "Assume the input is numbers [0-9].", this->params()),
158  double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
159  this->params()),
161  "Penalty to add to worst rating for noise", this->params()),
163  dict_(this),
164  static_classifier_(NULL) {
165  fontinfo_table_.set_compare_callback(
167  fontinfo_table_.set_clear_callback(
169  fontset_table_.set_compare_callback(
171  fontset_table_.set_clear_callback(
176  AllProtosOn = NULL;
177  AllConfigsOn = NULL;
180  NormProtos = NULL;
181 
182  NumAdaptationsFailed = 0;
183 
184  learn_debug_win_ = NULL;
185  learn_fragmented_word_debug_win_ = NULL;
186  learn_fragments_debug_win_ = NULL;
187 
188  CharNormCutoffs = new uinT16[MAX_NUM_CLASSES];
189  BaselineCutoffs = new uinT16[MAX_NUM_CLASSES];
190 }
bool matcher_debug_separate_windows
Definition: classify.h:458
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:307
int classify_integer_matcher_multiplier
Definition: classify.h:469
bool classify_bln_numeric_mode
Definition: classify.h:500
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:128
double classify_min_norm_scale_y
Definition: classify.h:399
bool classify_enable_adaptive_matcher
Definition: classify.h:409
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
double matcher_reliable_adaptive_result
Definition: classify.h:421
double tessedit_class_miss_scale
Definition: classify.h:439
double matcher_good_threshold
Definition: classify.h:420
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
bool prioritize_division
Definition: classify.h:387
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:304
bool classify_save_adapted_templates
Definition: classify.h:413
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
double classify_adapted_pruning_factor
Definition: classify.h:441
int matcher_min_examples_for_prototyping
Definition: classify.h:428
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
double speckle_rating_penalty
Definition: classify.h:503
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int classify_learning_debug_level
Definition: classify.h:419
double matcher_perfect_threshold
Definition: classify.h:422
double matcher_rating_margin
Definition: classify.h:424
bool classify_nonlinear_norm
Definition: classify.h:416
double speckle_large_max_size
Definition: classify.h:501
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
ShapeTable * shape_table_
Definition: classify.h:512
double classify_max_norm_scale_x
Definition: classify.h:398
int classify_adapt_proto_threshold
Definition: classify.h:445
int matcher_permanent_classes_min
Definition: classify.h:426
double certainty_scale
Definition: classify.h:437
char * classify_learn_debug_str
Definition: classify.h:459
int classify_class_pruner_multiplier
Definition: classify.h:465
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:120
int classify_class_pruner_threshold
Definition: classify.h:463
bool classify_use_pre_adapted_templates
Definition: classify.h:411
double matcher_avg_noise_size
Definition: classify.h:425
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:146
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:301
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
double matcher_bad_match_pad
Definition: classify.h:423
double classify_max_norm_scale_y
Definition: classify.h:400
bool classify_debug_character_fragments
Definition: classify.h:455
ParamsVectors * params()
Definition: ccutil.h:65
double classify_max_rating_ratio
Definition: classify.h:402
double classify_char_norm_range
Definition: classify.h:396
double classify_min_norm_scale_x
Definition: classify.h:397
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:139
#define FALSE
Definition: capi.h:29
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:430
double classify_adapted_pruning_threshold
Definition: classify.h:443
double classify_max_certainty_margin
Definition: classify.h:404
#define TRUE
Definition: capi.h:28
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:310
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
bool disable_character_fragments
Definition: classify.h:450
bool classify_enable_learning
Definition: classify.h:389
#define NULL
Definition: host.h:144
BIT_VECTOR TempProtoMask
Definition: classify.h:483
bool allow_blob_division
Definition: classify.h:382
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
bool classify_enable_adaptive_debugger
Definition: classify.h:414
double classify_misfit_junk_penalty
Definition: classify.h:435
double matcher_clustering_max_angle_delta
Definition: classify.h:432
int classify_cp_cutoff_strength
Definition: classify.h:467
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
int classify_adapt_feature_threshold
Definition: classify.h:447
unsigned short uinT16
Definition: host.h:101
NORM_PROTOS * NormProtos
Definition: classify.h:486
tesseract::Classify::~Classify ( )
virtual

Definition at line 192 of file classify.cpp.

192  {
194  delete learn_debug_win_;
195  delete learn_fragmented_word_debug_win_;
196  delete learn_fragments_debug_win_;
197  delete[] CharNormCutoffs;
198  delete[] BaselineCutoffs;
199 }
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456

Member Function Documentation

bool tesseract::Classify::AdaptableWord ( WERD_RES word)

Return TRUE if the specified word is acceptable for adaptation.

Globals: none

Parameters
Wordcurrent word
BestChoiceWordbest overall choice for word with context
Returns
TRUE or FALSE
Note
Exceptions: none
History: Thu May 30 14:25:06 1991, DSJ, Created.

Definition at line 851 of file adaptmatch.cpp.

851  {
852  if (word->best_choice == NULL) return false;
853  int BestChoiceLength = word->best_choice->length();
854  float adaptable_score =
856  return // rules that apply in general - simplest to compute first
857  BestChoiceLength > 0 &&
858  BestChoiceLength == word->rebuild_word->NumBlobs() &&
859  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
860  // This basically ensures that the word is at least a dictionary match
861  // (freq word, user word, system dawg word, etc).
862  // Since all the other adjustments will make adjust factor higher
863  // than higher than adaptable_score=1.1+0.05=1.15
864  // Since these are other flags that ensure that the word is dict word,
865  // this check could be at times redundant.
866  word->best_choice->adjust_factor() <= adaptable_score &&
867  // Make sure that alternative choices are not dictionary words.
868  word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
869 }
int length() const
Definition: ratngs.h:300
WERD_CHOICE * best_choice
Definition: pageres.h:219
double segment_penalty_dict_case_ok
Definition: dict.h:574
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:73
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:71
int NumBlobs() const
Definition: blobs.h:425
TWERD * rebuild_word
Definition: pageres.h:244
Dict & getDict()
Definition: classify.h:65
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
Definition: pageres.cpp:430
float adjust_factor() const
Definition: ratngs.h:303
#define NULL
Definition: host.h:144
void tesseract::Classify::AdaptiveClassifier ( TBLOB Blob,
BLOB_CHOICE_LIST *  Choices 
)

This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.

It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.

Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.

This routine also performs some simple speckle filtering.

Note
Exceptions: none
History: Mon Mar 11 10:00:58 1991, DSJ, Created.
Parameters
Blobblob to be classified
[out]ChoicesList of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher.

Definition at line 185 of file adaptmatch.cpp.

185  {
186  assert(Choices != NULL);
187  ADAPT_RESULTS *Results = new ADAPT_RESULTS;
188  Results->Initialize();
189 
191 
192  DoAdaptiveMatch(Blob, Results);
193 
194  RemoveBadMatches(Results);
196  RemoveExtraPuncs(Results);
197  Results->ComputeBest();
198  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
199  Choices);
200 
201  // TODO(rays) Move to before ConvertMatchesToChoices!
202  if (LargeSpeckle(*Blob) || Choices->length() == 0)
203  AddLargeSpeckleTo(Results->BlobLength, Choices);
204 
205  if (matcher_debug_level >= 1) {
206  tprintf("AD Matches = ");
207  PrintAdaptiveMatchResults(*Results);
208  }
209 
210 #ifndef GRAPHICS_DISABLED
212  DebugAdaptiveClassifier(Blob, Results);
213 #endif
214 
215  delete Results;
216 } /* AdaptiveClassifier */
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:212
inT32 BlobLength
Definition: adaptmatch.cpp:83
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define tprintf(...)
Definition: tprintf.h:31
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:235
#define ASSERT_HOST(x)
Definition: errcode.h:84
void ComputeBest()
Definition: adaptmatch.cpp:99
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
void RemoveBadMatches(ADAPT_RESULTS *Results)
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
const DENORM & denorm() const
Definition: blobs.h:340
void Initialize()
Definition: adaptmatch.cpp:93
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
bool classify_enable_adaptive_debugger
Definition: classify.h:414
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool tesseract::Classify::AdaptiveClassifierIsEmpty ( ) const
inline

Definition at line 285 of file classify.h.

285  {
286  return AdaptedTemplates->NumPermClasses == 0;
287  }
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool tesseract::Classify::AdaptiveClassifierIsFull ( ) const
inline

Definition at line 284 of file classify.h.

284 { return NumAdaptationsFailed > 0; }
void tesseract::Classify::AdaptToChar ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
FLOAT32  Threshold,
ADAPT_TEMPLATES  adaptive_templates 
)
Parameters
Blobblob to add to templates for ClassId
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained templates
Thresholdminimum match rating to existing template
adaptive_templatescurrent set of adapted templates

Globals:

  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs
Returns
none
Note
Exceptions: none
History: Thu Mar 14 09:36:03 1991, DSJ, Created.

Definition at line 887 of file adaptmatch.cpp.

889  {
890  int NumFeatures;
891  INT_FEATURE_ARRAY IntFeatures;
892  UnicharRating int_result;
893  INT_CLASS IClass;
894  ADAPT_CLASS Class;
895  TEMP_CONFIG TempConfig;
896  FEATURE_SET FloatFeatures;
897  int NewTempConfigId;
898 
899  if (!LegalClassId (ClassId))
900  return;
901 
902  int_result.unichar_id = ClassId;
903  Class = adaptive_templates->Class[ClassId];
904  assert(Class != NULL);
905  if (IsEmptyAdaptedClass(Class)) {
906  InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
907  } else {
908  IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
909 
910  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
911  if (NumFeatures <= 0)
912  return;
913 
914  // Only match configs with the matching font.
915  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
916  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
917  if (GetFontinfoId(Class, cfg) == FontinfoId) {
918  SET_BIT(MatchingFontConfigs, cfg);
919  } else {
920  reset_bit(MatchingFontConfigs, cfg);
921  }
922  }
923  im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
924  NumFeatures, IntFeatures,
927  FreeBitVector(MatchingFontConfigs);
928 
929  SetAdaptiveThreshold(Threshold);
930 
931  if (1.0f - int_result.rating <= Threshold) {
932  if (ConfigIsPermanent(Class, int_result.config)) {
934  tprintf("Found good match to perm config %d = %4.1f%%.\n",
935  int_result.config, int_result.rating * 100.0);
936  FreeFeatureSet(FloatFeatures);
937  return;
938  }
939 
940  TempConfig = TempConfigFor(Class, int_result.config);
941  IncreaseConfidence(TempConfig);
942  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
943  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
944  }
946  tprintf("Increasing reliability of temp config %d to %d.\n",
947  int_result.config, TempConfig->NumTimesSeen);
948 
949  if (TempConfigReliable(ClassId, TempConfig)) {
950  MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
951  UpdateAmbigsGroup(ClassId, Blob);
952  }
953  } else {
955  tprintf("Found poor match to temp config %d = %4.1f%%.\n",
956  int_result.config, int_result.rating * 100.0);
958  DisplayAdaptedChar(Blob, IClass);
959  }
960  NewTempConfigId =
961  MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
962  NumFeatures, IntFeatures, FloatFeatures);
963  if (NewTempConfigId >= 0 &&
964  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
965  MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
966  UpdateAmbigsGroup(ClassId, Blob);
967  }
968 
969 #ifndef GRAPHICS_DISABLED
971  DisplayAdaptedChar(Blob, IClass);
972  }
973 #endif
974  }
975  FreeFeatureSet(FloatFeatures);
976  }
977 } /* AdaptToChar */
bool matcher_debug_separate_windows
Definition: classify.h:458
#define tprintf(...)
Definition: tprintf.h:31
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int classify_learning_debug_level
Definition: classify.h:419
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:90
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
#define LegalClassId(c)
Definition: intproto.h:179
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:461
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
void SetAdaptiveThreshold(FLOAT32 Threshold)
#define NO_DEBUG
Definition: adaptmatch.cpp:70
uinT8 NumTimesSeen
Definition: adaptive.h:41
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:979
#define ClassForClassId(T, c)
Definition: intproto.h:181
INT_TEMPLATES Templates
Definition: adaptive.h:77
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:812
IntegerMatcher im_
Definition: classify.h:503
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:717
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:55
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:108
uinT8 NumConfigs
Definition: intproto.h:110
#define SET_BIT(array, bit)
Definition: bitvec.h:57
#define NULL
Definition: host.h:144
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
#define MAX_NUM_PROTOS
Definition: intproto.h:47
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:79
#define reset_bit(array, bit)
Definition: bitvec.h:59
int classify_adapt_feature_threshold
Definition: classify.h:447
void tesseract::Classify::AddLargeSpeckleTo ( int  blob_length,
BLOB_CHOICE_LIST *  choices 
)

Definition at line 212 of file classify.cpp.

212  {
213  BLOB_CHOICE_IT bc_it(choices);
214  // If there is no classifier result, we will use the worst possible certainty
215  // and corresponding rating.
216  float certainty = -getDict().certainty_scale;
217  float rating = rating_scale * blob_length;
218  if (!choices->empty() && blob_length > 0) {
219  bc_it.move_to_last();
220  BLOB_CHOICE* worst_choice = bc_it.data();
221  // Add speckle_rating_penalty to worst rating, matching old value.
222  rating = worst_choice->rating() + speckle_rating_penalty;
223  // Compute the rating to correspond to the certainty. (Used to be kept
224  // the same, but that messes up the language model search.)
225  certainty = -rating * getDict().certainty_scale /
226  (rating_scale * blob_length);
227  }
228  BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
229  -1, 0.0f, MAX_FLOAT32, 0,
231  bc_it.add_to_end(blob_choice);
232 }
double speckle_rating_penalty
Definition: classify.h:503
float rating() const
Definition: ratngs.h:79
Dict & getDict()
Definition: classify.h:65
#define MAX_FLOAT32
Definition: host.h:124
double certainty_scale
Definition: dict.h:601
void tesseract::Classify::AddNewResult ( const UnicharRating new_result,
ADAPT_RESULTS results 
)

This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.

Globals:

Parameters
[out]resultsresults to add new result to
class_idclass of new result
shape_idshape index
ratingrating of new result
adaptedadapted match or not
configconfig id of new result
fontinfo_idfont information of the new result
fontinfo_id2font information of the 2nd choice result
Note
Exceptions: none
History: Tue Mar 12 18:19:29 1991, DSJ, Created.

Definition at line 1037 of file adaptmatch.cpp.

1038  {
1039  int old_match = FindScoredUnichar(new_result.unichar_id, *results);
1040 
1041  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
1042  (old_match < results->match.size() &&
1043  new_result.rating <= results->match[old_match].rating))
1044  return; // New one not good enough.
1045 
1046  if (!unicharset.get_fragment(new_result.unichar_id))
1047  results->HasNonfragment = true;
1048 
1049  if (old_match < results->match.size()) {
1050  results->match[old_match].rating = new_result.rating;
1051  } else {
1052  results->match.push_back(new_result);
1053  }
1054 
1055  if (new_result.rating > results->best_rating &&
1056  // Ensure that fragments do not affect best rating, class and config.
1057  // This is needed so that at least one non-fragmented character is
1058  // always present in the results.
1059  // TODO(daria): verify that this helps accuracy and does not
1060  // hurt performance.
1061  !unicharset.get_fragment(new_result.unichar_id)) {
1062  results->best_match_index = old_match;
1063  results->best_rating = new_result.rating;
1064  results->best_unichar_id = new_result.unichar_id;
1065  }
1066 } /* AddNewResult */
int push_back(T object)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
UNICHARSET unicharset
Definition: ccutil.h:72
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
int best_match_index
Definition: adaptmatch.cpp:86
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
bool HasNonfragment
Definition: adaptmatch.cpp:84
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
void tesseract::Classify::AmbigClassifier ( const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
const TBLOB blob,
INT_TEMPLATES  templates,
ADAPT_CLASS classes,
UNICHAR_ID ambiguities,
ADAPT_RESULTS results 
)

This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.

Globals:

Parameters
Blobblob to be classified
Templatesbuilt-in templates to classify against
Classesadapted class templates
Ambiguitiesarray of class id's to match against
[out]Resultsplace to put match results
Note
Exceptions: none
History: Tue Mar 12 19:40:36 1991, DSJ, Created.

Definition at line 1089 of file adaptmatch.cpp.

1096  {
1097  if (int_features.empty()) return;
1098  uinT8* CharNormArray = new uinT8[unicharset.size()];
1099  UnicharRating int_result;
1100 
1101  results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
1102  CharNormArray);
1103  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1104  if (debug)
1105  tprintf("AM Matches = ");
1106 
1107  int top = blob->bounding_box().top();
1108  int bottom = blob->bounding_box().bottom();
1109  while (*ambiguities >= 0) {
1110  CLASS_ID class_id = *ambiguities;
1111 
1112  int_result.unichar_id = class_id;
1113  im_.Match(ClassForClassId(templates, class_id),
1115  int_features.size(), &int_features[0],
1116  &int_result,
1119 
1120  ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
1121  results->BlobLength,
1123  CharNormArray, &int_result, results);
1124  ambiguities++;
1125  }
1126  delete [] CharNormArray;
1127 } /* AmbigClassifier */
bool matcher_debug_separate_windows
Definition: classify.h:458
int size() const
Definition: genericvector.h:72
int classify_integer_matcher_multiplier
Definition: classify.h:469
inT32 BlobLength
Definition: adaptmatch.cpp:83
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:461
#define NO_DEBUG
Definition: adaptmatch.cpp:70
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
inT16 bottom() const
Definition: rect.h:61
#define ClassForClassId(T, c)
Definition: intproto.h:181
bool empty() const
Definition: genericvector.h:84
IntegerMatcher im_
Definition: classify.h:503
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
int size() const
Definition: unicharset.h:297
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
inT16 top() const
Definition: rect.h:54
int classify_adapt_feature_threshold
Definition: classify.h:447
unsigned char uinT8
Definition: host.h:99
UNICHAR_ID * tesseract::Classify::BaselineClassifier ( TBLOB Blob,
const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Globals:

  • BaselineCutoffs expected num features for each class
Parameters
Blobblob to be classified
Templatescurrent set of adapted templates
Resultsplace to put match results
Returns
Array of possible ambiguous chars that should be checked.
Note
Exceptions: none
History: Tue Mar 12 19:38:03 1991, DSJ, Created.

Definition at line 1309 of file adaptmatch.cpp.

1312  {
1313  if (int_features.empty()) return NULL;
1314  uinT8* CharNormArray = new uinT8[unicharset.size()];
1315  ClearCharNormArray(CharNormArray);
1316 
1318  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
1319  CharNormArray, BaselineCutoffs, &Results->CPResults);
1320 
1321  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1322  tprintf("BL Matches = ");
1323 
1324  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
1325  CharNormArray,
1326  Templates->Class, matcher_debug_flags, 0,
1327  Blob->bounding_box(), Results->CPResults, Results);
1328 
1329  delete [] CharNormArray;
1330  CLASS_ID ClassId = Results->best_unichar_id;
1331  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
1332  return NULL;
1333 
1334  return Templates->Class[ClassId]->
1335  Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
1336 } /* BaselineClassifier */
void ClearCharNormArray(uinT8 *char_norm_array)
Definition: float2int.cpp:48
int size() const
Definition: genericvector.h:72
inT32 BlobLength
Definition: adaptmatch.cpp:83
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
CLUSTERCONFIG Config
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
int best_match_index
Definition: adaptmatch.cpp:86
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
INT_TEMPLATES Templates
Definition: adaptive.h:77
bool empty() const
Definition: genericvector.h:84
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:407
int IntCastRounded(double x)
Definition: helpers.h:172
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
int size() const
Definition: unicharset.h:297
const double kStandardFeatureLength
Definition: intfx.h:46
unsigned char uinT8
Definition: host.h:99
int tesseract::Classify::CharNormClassifier ( TBLOB blob,
const TrainingSample sample,
ADAPT_RESULTS adapt_results 
)

This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Parameters
Blobblob to be classified
Templatestemplates to classify unknown against
Resultsplace to put match results

Globals:

  • CharNormCutoffs expected num features for each class
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs
Note
Exceptions: none
History: Tue Mar 12 16:02:52 1991, DSJ, Created.

Definition at line 1358 of file adaptmatch.cpp.

1360  {
1361  // This is the length that is used for scaling ratings vs certainty.
1362  adapt_results->BlobLength =
1363  IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1364  GenericVector<UnicharRating> unichar_results;
1365  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
1366  -1, &unichar_results);
1367  // Convert results to the format used internally by AdaptiveClassifier.
1368  for (int r = 0; r < unichar_results.size(); ++r) {
1369  AddNewResult(unichar_results[r], adapt_results);
1370  }
1371  return sample.num_features();
1372 } /* CharNormClassifier */
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
inT32 BlobLength
Definition: adaptmatch.cpp:83
Pix * pix() const
Definition: normalis.h:248
const DENORM & denorm() const
Definition: blobs.h:340
Definition: cluster.h:32
int IntCastRounded(double x)
Definition: helpers.h:172
const double kStandardFeatureLength
Definition: intfx.h:46
int tesseract::Classify::CharNormTrainingSample ( bool  pruner_only,
int  keep_this,
const TrainingSample sample,
GenericVector< UnicharRating > *  results 
)

Definition at line 1376 of file adaptmatch.cpp.

1379  {
1380  results->clear();
1381  ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
1382  adapt_results->Initialize();
1383  // Compute the bounding box of the features.
1384  int num_features = sample.num_features();
1385  // Only the top and bottom of the blob_box are used by MasterMatcher, so
1386  // fabricate right and left using top and bottom.
1387  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
1388  sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
1389  // Compute the char_norm_array from the saved cn_feature.
1390  FEATURE norm_feature = sample.GetCNFeature();
1391  uinT8* char_norm_array = new uinT8[unicharset.size()];
1392  int num_pruner_classes = MAX(unicharset.size(),
1394  uinT8* pruner_norm_array = new uinT8[num_pruner_classes];
1395  adapt_results->BlobLength =
1396  static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
1397  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
1398  pruner_norm_array);
1399 
1400  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
1401  pruner_norm_array,
1402  shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1403  &adapt_results->CPResults);
1404  delete [] pruner_norm_array;
1405  if (keep_this >= 0) {
1406  adapt_results->CPResults[0].Class = keep_this;
1407  adapt_results->CPResults.truncate(1);
1408  }
1409  if (pruner_only) {
1410  // Convert pruner results to output format.
1411  for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
1412  int class_id = adapt_results->CPResults[i].Class;
1413  results->push_back(
1414  UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1415  }
1416  } else {
1417  MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1418  char_norm_array,
1421  blob_box, adapt_results->CPResults, adapt_results);
1422  // Convert master matcher results to output format.
1423  for (int i = 0; i < adapt_results->match.size(); i++) {
1424  results->push_back(adapt_results->match[i]);
1425  }
1427  }
1428  delete [] char_norm_array;
1429  delete adapt_results;
1430  return num_features;
1431 } /* CharNormTrainingSample */
int size() const
Definition: genericvector.h:72
void truncate(int size)
int classify_integer_matcher_multiplier
Definition: classify.h:469
#define MAX(x, y)
Definition: ndminx.h:24
int push_back(T object)
inT32 BlobLength
Definition: adaptmatch.cpp:83
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
UNICHARSET unicharset
Definition: ccutil.h:72
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
FLOAT32 ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:32
ShapeTable * shape_table_
Definition: classify.h:512
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:407
void Initialize()
Definition: adaptmatch.cpp:93
Definition: cluster.h:32
Definition: rect.h:30
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
#define NULL
Definition: host.h:144
int size() const
Definition: unicharset.h:297
unsigned char uinT8
Definition: host.h:99
int tesseract::Classify::ClassAndConfigIDToFontOrShapeID ( int  class_id,
int  int_result_config 
) const

Definition at line 2290 of file adaptmatch.cpp.

2291  {
2292  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2293  // Older inttemps have no font_ids.
2294  if (font_set_id < 0)
2295  return kBlankFontinfoId;
2296  const FontSet &fs = fontset_table_.get(font_set_id);
2297  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2298  return fs.configs[int_result_config];
2299 }
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
#define ASSERT_HOST(x)
Definition: errcode.h:84
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
STRING tesseract::Classify::ClassIDToDebugStr ( const INT_TEMPLATES_STRUCT templates,
int  class_id,
int  config_id 
) const

Definition at line 2277 of file adaptmatch.cpp.

2278  {
2279  STRING class_string;
2280  if (templates == PreTrainedTemplates && shape_table_ != NULL) {
2281  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2282  class_string = shape_table_->DebugStr(shape_id);
2283  } else {
2284  class_string = unicharset.debug_str(class_id);
2285  }
2286  return class_string;
2287 }
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
UNICHARSET unicharset
Definition: ccutil.h:72
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
ShapeTable * shape_table_
Definition: classify.h:512
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:291
Definition: strngs.h:44
#define NULL
Definition: host.h:144
void tesseract::Classify::ClassifyAsNoise ( ADAPT_RESULTS results)

This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.

Parameters
Resultsresults to add noise classification to

Globals:

  • matcher_avg_noise_size avg. length of a noise blob
Note
Exceptions: none
History: Tue Mar 12 18:36:52 1991, DSJ, Created.

Definition at line 1449 of file adaptmatch.cpp.

1449  {
1450  float rating = results->BlobLength / matcher_avg_noise_size;
1451  rating *= rating;
1452  rating /= 1.0 + rating;
1453 
1454  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
1455 } /* ClassifyAsNoise */
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
inT32 BlobLength
Definition: adaptmatch.cpp:83
double matcher_avg_noise_size
Definition: classify.h:425
void tesseract::Classify::ClearCharNormArray ( uinT8 char_norm_array)

For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.

Globals:

  • none
Parameters
char_norm_arrayarray to be cleared
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 48 of file float2int.cpp.

48  {
49  memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
50 } /* ClearCharNormArray */
UNICHARSET unicharset
Definition: ccutil.h:72
int size() const
Definition: unicharset.h:297
void tesseract::Classify::ComputeCharNormArrays ( FEATURE_STRUCT norm_feature,
INT_TEMPLATES_STRUCT templates,
uinT8 char_norm_array,
uinT8 pruner_array 
)

Definition at line 1753 of file adaptmatch.cpp.

1756  {
1757  ComputeIntCharNormArray(*norm_feature, char_norm_array);
1758  if (pruner_array != NULL) {
1759  if (shape_table_ == NULL) {
1760  ComputeIntCharNormArray(*norm_feature, pruner_array);
1761  } else {
1762  memset(pruner_array, MAX_UINT8,
1763  templates->NumClasses * sizeof(pruner_array[0]));
1764  // Each entry in the pruner norm array is the MIN of all the entries of
1765  // the corresponding unichars in the CharNormArray.
1766  for (int id = 0; id < templates->NumClasses; ++id) {
1767  int font_set_id = templates->Class[id]->font_set_id;
1768  const FontSet &fs = fontset_table_.get(font_set_id);
1769  for (int config = 0; config < fs.size; ++config) {
1770  const Shape& shape = shape_table_->GetShape(fs.configs[config]);
1771  for (int c = 0; c < shape.size(); ++c) {
1772  if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
1773  pruner_array[id] = char_norm_array[shape[c].unichar_id];
1774  }
1775  }
1776  }
1777  }
1778  }
1779  FreeFeature(norm_feature);
1780 }
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
Definition: float2int.cpp:69
ShapeTable * shape_table_
Definition: classify.h:512
#define MAX_UINT8
Definition: host.h:121
void FreeFeature(FEATURE Feature)
Definition: ocrfeatures.cpp:59
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
#define NULL
Definition: host.h:144
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
double tesseract::Classify::ComputeCorrectedRating ( bool  debug,
int  unichar_id,
double  cp_rating,
double  im_rating,
int  feature_misses,
int  bottom,
int  top,
int  blob_length,
int  matcher_multiplier,
const uinT8 cn_factors 
)

Definition at line 1246 of file adaptmatch.cpp.

1251  {
1252  // Compute class feature corrections.
1253  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
1254  cn_factors[unichar_id],
1255  matcher_multiplier);
1256  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1257  double vertical_penalty = 0.0;
1258  // Penalize non-alnums for being vertical misfits.
1259  if (!unicharset.get_isalpha(unichar_id) &&
1260  !unicharset.get_isdigit(unichar_id) &&
1261  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1262  int min_bottom, max_bottom, min_top, max_top;
1263  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
1264  &min_top, &max_top);
1265  if (debug) {
1266  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1267  top, min_top, max_top, bottom, min_bottom, max_bottom);
1268  }
1269  if (top < min_top || top > max_top ||
1270  bottom < min_bottom || bottom > max_bottom) {
1271  vertical_penalty = classify_misfit_junk_penalty;
1272  }
1273  }
1274  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1275  if (result < WORST_POSSIBLE_RATING)
1276  result = WORST_POSSIBLE_RATING;
1277  if (debug) {
1278  tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1279  unicharset.id_to_unichar(unichar_id),
1280  result * 100.0,
1281  cp_rating * 100.0,
1282  (1.0 - im_rating) * 100.0,
1283  (cn_corrected - (1.0 - im_rating)) * 100.0,
1284  cn_factors[unichar_id],
1285  miss_penalty * 100.0,
1286  vertical_penalty * 100.0);
1287  }
1288  return result;
1289 }
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
#define tprintf(...)
Definition: tprintf.h:31
double tessedit_class_miss_scale
Definition: classify.h:439
UNICHARSET unicharset
Definition: ccutil.h:72
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:470
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:526
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
IntegerMatcher im_
Definition: classify.h:503
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:77
double classify_misfit_junk_penalty
Definition: classify.h:435
void tesseract::Classify::ComputeIntCharNormArray ( const FEATURE_STRUCT norm_feature,
uinT8 char_norm_array 
)

For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.

Globals:

  • PreTrainedTemplates current set of built-in templates
Parameters
norm_featurecharacter normalization feature
[out]char_norm_arrayplace to put results of size unicharset.size()
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 69 of file float2int.cpp.

70  {
71  for (int i = 0; i < unicharset.size(); i++) {
72  if (i < PreTrainedTemplates->NumClasses) {
73  int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE *
74  ComputeNormMatch(i, norm_feature, FALSE));
75  char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
76  } else {
77  // Classes with no templates (eg. ambigs & ligatures) default
78  // to worst match.
79  char_norm_array[i] = MAX_INT_CHAR_NORM;
80  }
81  }
82 } /* ComputeIntCharNormArray */
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:133
UNICHARSET unicharset
Definition: ccutil.h:72
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
#define MAX_INT_CHAR_NORM
Definition: float2int.cpp:28
FLOAT32 ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
Definition: normmatch.cpp:73
#define FALSE
Definition: capi.h:29
int size() const
Definition: unicharset.h:297
void tesseract::Classify::ComputeIntFeatures ( FEATURE_SET  Features,
INT_FEATURE_ARRAY  IntFeatures 
)

This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.

Globals:

  • none
Parameters
Featuresfloating point pico-features to be converted
[out]IntFeaturesarray to put converted features into
Note
Exceptions: none
History: Wed Feb 20 10:58:45 1991, DSJ, Created.

Definition at line 100 of file float2int.cpp.

101  {
102  int Fid;
103  FEATURE Feature;
104  FLOAT32 YShift;
105 
107  YShift = BASELINE_Y_SHIFT;
108  else
109  YShift = Y_SHIFT;
110 
111  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
112  Feature = Features->Features[Fid];
113 
114  IntFeatures[Fid].X =
116  IntFeatures[Fid].Y =
117  Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE);
118  IntFeatures[Fid].Theta = CircBucketFor(Feature->Params[PicoFeatDir],
120  IntFeatures[Fid].CP_misses = 0;
121  }
122 } /* ComputeIntFeatures */
#define X_SHIFT
Definition: intproto.h:40
#define ANGLE_SHIFT
Definition: intproto.h:39
float FLOAT32
Definition: host.h:111
uinT8 Bucket8For(FLOAT32 param, FLOAT32 offset, int num_buckets)
Definition: intproto.cpp:445
#define Y_SHIFT
Definition: intproto.h:41
FEATURE Features[1]
Definition: ocrfeatures.h:72
uinT8 CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets)
Definition: intproto.cpp:458
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define INT_FEAT_RANGE
Definition: float2int.h:27
#define BASELINE_Y_SHIFT
Definition: float2int.h:28
FLOAT32 tesseract::Classify::ComputeNormMatch ( CLASS_ID  ClassId,
const FEATURE_STRUCT feature,
BOOL8  DebugMatch 
)

Definition at line 73 of file normmatch.cpp.

75  {
76 /*
77  ** Parameters:
78  ** ClassId id of class to match against
79  ** Feature character normalization feature
80  ** DebugMatch controls dump of debug info
81  ** Globals:
82  ** NormProtos character normalization prototypes
83  ** Operation: This routine compares Features against each character
84  ** normalization proto for ClassId and returns the match
85  ** rating of the best match.
86  ** Return: Best match rating for Feature against protos of ClassId.
87  ** Exceptions: none
88  ** History: Wed Dec 19 16:56:12 1990, DSJ, Created.
89  */
90  LIST Protos;
91  FLOAT32 BestMatch;
92  FLOAT32 Match;
93  FLOAT32 Delta;
94  PROTOTYPE *Proto;
95  int ProtoId;
96 
97  if (ClassId >= NormProtos->NumProtos) {
98  ClassId = NO_CLASS;
99  }
100 
101  /* handle requests for classification as noise */
102  if (ClassId == NO_CLASS) {
103  /* kludge - clean up constants and make into control knobs later */
104  Match = (feature.Params[CharNormLength] *
105  feature.Params[CharNormLength] * 500.0 +
106  feature.Params[CharNormRx] *
107  feature.Params[CharNormRx] * 8000.0 +
108  feature.Params[CharNormRy] *
109  feature.Params[CharNormRy] * 8000.0);
110  return (1.0 - NormEvidenceOf (Match));
111  }
112 
113  BestMatch = MAX_FLOAT32;
114  Protos = NormProtos->Protos[ClassId];
115 
116  if (DebugMatch) {
117  tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId));
118  }
119 
120  ProtoId = 0;
121  iterate(Protos) {
122  Proto = (PROTOTYPE *) first_node (Protos);
123  Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY];
124  Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY];
125  if (DebugMatch) {
126  tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
127  Proto->Mean[CharNormY], Delta,
128  Proto->Weight.Elliptical[CharNormY], Match);
129  }
130  Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx];
131  Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx];
132  if (DebugMatch) {
133  tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
134  Proto->Mean[CharNormRx], Delta,
135  Proto->Weight.Elliptical[CharNormRx], Match);
136  }
137  // Ry is width! See intfx.cpp.
138  Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy];
139  if (DebugMatch) {
140  tprintf("Width: Proto=%g, Delta=%g, Var=%g\n",
141  Proto->Mean[CharNormRy], Delta,
142  Proto->Weight.Elliptical[CharNormRy]);
143  }
144  Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy];
145  Delta *= kWidthErrorWeighting;
146  Match += Delta;
147  if (DebugMatch) {
148  tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
149  Match, Match / classify_norm_adj_midpoint,
150  NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
151  }
152 
153  if (Match < BestMatch)
154  BestMatch = Match;
155 
156  ProtoId++;
157  }
158  return 1.0 - NormEvidenceOf(BestMatch);
159 } /* ComputeNormMatch */
#define first_node(l)
Definition: oldlist.h:139
float FLOAT32
Definition: host.h:111
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
double classify_norm_adj_midpoint
Definition: normmatch.cpp:63
#define iterate(l)
Definition: oldlist.h:159
FLOAT32 * Mean
Definition: cluster.h:78
LIST * Protos
Definition: normmatch.cpp:42
FLOATUNION Weight
Definition: cluster.h:83
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
FLOAT32 * Elliptical
Definition: cluster.h:64
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define NO_CLASS
Definition: matchdefs.h:36
double NormEvidenceOf(register double NormAdj)
Definition: normmatch.cpp:183
#define MAX_FLOAT32
Definition: host.h:124
const double kWidthErrorWeighting
Definition: normmatch.cpp:66
NORM_PROTOS * NormProtos
Definition: classify.h:486
void tesseract::Classify::ConvertMatchesToChoices ( const DENORM denorm,
const TBOX box,
ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)

The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.

Definition at line 1463 of file adaptmatch.cpp.

1465  {
1466  assert(Choices != NULL);
1467  FLOAT32 Rating;
1468  FLOAT32 Certainty;
1469  BLOB_CHOICE_IT temp_it;
1470  bool contains_nonfrag = false;
1471  temp_it.set_to_list(Choices);
1472  int choices_length = 0;
1473  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1474  // number of returned results, but with a shape_table_ we want to have room
1475  // for at least the biggest shape (which might contain hundreds of Indic
1476  // grapheme fragments) and more, so use double the size of the biggest shape
1477  // if that is more than the default.
1478  int max_matches = MAX_MATCHES;
1479  if (shape_table_ != NULL) {
1480  max_matches = shape_table_->MaxNumUnichars() * 2;
1481  if (max_matches < MAX_MATCHES)
1482  max_matches = MAX_MATCHES;
1483  }
1484 
1485  float best_certainty = -MAX_FLOAT32;
1486  for (int i = 0; i < Results->match.size(); i++) {
1487  const UnicharRating& result = Results->match[i];
1488  bool adapted = result.adapted;
1489  bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != NULL);
1490  if (temp_it.length()+1 == max_matches &&
1491  !contains_nonfrag && current_is_frag) {
1492  continue; // look for a non-fragmented character to fill the
1493  // last spot in Choices if only fragments are present
1494  }
1495  // BlobLength can never be legally 0, this means recognition failed.
1496  // But we must return a classification result because some invoking
1497  // functions (chopper/permuter) do not anticipate a null blob choice.
1498  // So we need to assign a poor, but not infinitely bad score.
1499  if (Results->BlobLength == 0) {
1500  Certainty = -20;
1501  Rating = 100; // should be -certainty * real_blob_length
1502  } else {
1503  Rating = Certainty = (1.0f - result.rating);
1504  Rating *= rating_scale * Results->BlobLength;
1505  Certainty *= -(getDict().certainty_scale);
1506  }
1507  // Adapted results, by their very nature, should have good certainty.
1508  // Those that don't are at best misleading, and often lead to errors,
1509  // so don't accept adapted results that are too far behind the best result,
1510  // whether adapted or static.
1511  // TODO(rays) find some way of automatically tuning these constants.
1512  if (Certainty > best_certainty) {
1513  best_certainty = MIN(Certainty, classify_adapted_pruning_threshold);
1514  } else if (adapted &&
1515  Certainty / classify_adapted_pruning_factor < best_certainty) {
1516  continue; // Don't accept bad adapted results.
1517  }
1518 
1519  float min_xheight, max_xheight, yshift;
1520  denorm.XHeightRange(result.unichar_id, unicharset, box,
1521  &min_xheight, &max_xheight, &yshift);
1522  BLOB_CHOICE* choice =
1523  new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
1525  min_xheight, max_xheight, yshift,
1526  adapted ? BCC_ADAPTED_CLASSIFIER
1528  choice->set_fonts(result.fonts);
1529  temp_it.add_to_end(choice);
1530  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1531  choices_length++;
1532  if (choices_length >= max_matches) break;
1533  }
1534  Results->match.truncate(choices_length);
1535 } // ConvertMatchesToChoices
int size() const
Definition: genericvector.h:72
void truncate(int size)
float FLOAT32
Definition: host.h:111
inT32 BlobLength
Definition: adaptmatch.cpp:83
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define MIN(x, y)
Definition: ndminx.h:28
UNICHARSET unicharset
Definition: ccutil.h:72
double classify_adapted_pruning_factor
Definition: classify.h:441
int MaxNumUnichars() const
Definition: shapetable.cpp:465
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
ShapeTable * shape_table_
Definition: classify.h:512
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:611
Dict & getDict()
Definition: classify.h:65
double classify_adapted_pruning_threshold
Definition: classify.h:443
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:94
#define MAX_FLOAT32
Definition: host.h:124
#define MAX_MATCHES
Definition: adaptmatch.cpp:68
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
Definition: normalis.cpp:428
#define NULL
Definition: host.h:144
double certainty_scale
Definition: dict.h:601
void tesseract::Classify::ConvertProto ( PROTO  Proto,
int  ProtoId,
INT_CLASS  Class 
)

Definition at line 518 of file intproto.cpp.

518  {
519 /*
520  ** Parameters:
521  ** Proto floating-pt proto to be converted to integer format
522  ** ProtoId id of proto
523  ** Class integer class to add converted proto to
524  ** Globals: none
525  ** Operation: This routine converts Proto to integer format and
526  ** installs it as ProtoId in Class.
527  ** Return: none
528  ** Exceptions: none
529  ** History: Fri Feb 8 11:22:43 1991, DSJ, Created.
530  */
531  INT_PROTO P;
532  FLOAT32 Param;
533 
534  assert(ProtoId < Class->NumProtos);
535 
536  P = ProtoForProtoId(Class, ProtoId);
537 
538  Param = Proto->A * 128;
539  P->A = TruncateParam(Param, -128, 127, NULL);
540 
541  Param = -Proto->B * 256;
542  P->B = TruncateParam(Param, 0, 255, NULL);
543 
544  Param = Proto->C * 128;
545  P->C = TruncateParam(Param, -128, 127, NULL);
546 
547  Param = Proto->Angle * 256;
548  if (Param < 0 || Param >= 256)
549  P->Angle = 0;
550  else
551  P->Angle = (uinT8) Param;
552 
553  /* round proto length to nearest integer number of pico-features */
554  Param = (Proto->Length / GetPicoFeatureLength()) + 0.5;
555  Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, NULL);
557  cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
558  P->A, P->B, P->C, Class->ProtoLengths[ProtoId]);
559 } /* ConvertProto */
float FLOAT32
Definition: host.h:111
int classify_learning_debug_level
Definition: classify.h:419
#define ProtoForProtoId(C, P)
Definition: intproto.h:171
FLOAT32 Angle
Definition: protos.h:49
#define GetPicoFeatureLength()
Definition: picofeat.h:59
FLOAT32 B
Definition: protos.h:45
int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id)
Definition: intproto.cpp:1913
FLOAT32 Length
Definition: protos.h:50
FLOAT32 C
Definition: protos.h:46
#define NULL
Definition: host.h:144
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
uinT8 * ProtoLengths
Definition: intproto.h:112
FLOAT32 A
Definition: protos.h:44
unsigned char uinT8
Definition: host.h:99
INT_TEMPLATES tesseract::Classify::CreateIntTemplates ( CLASSES  FloatProtos,
const UNICHARSET target_unicharset 
)

Definition at line 563 of file intproto.cpp.

565  {
566 /*
567  ** Parameters:
568  ** FloatProtos prototypes in old floating pt format
569  ** Globals: none
570  ** Operation: This routine converts from the old floating point format
571  ** to the new integer format.
572  ** Return: New set of training templates in integer format.
573  ** Exceptions: none
574  ** History: Thu Feb 7 14:40:42 1991, DSJ, Created.
575  */
576  INT_TEMPLATES IntTemplates;
577  CLASS_TYPE FClass;
578  INT_CLASS IClass;
579  int ClassId;
580  int ProtoId;
581  int ConfigId;
582 
583  IntTemplates = NewIntTemplates();
584 
585  for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
586  FClass = &(FloatProtos[ClassId]);
587  if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
588  strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
589  cprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n",
590  target_unicharset.id_to_unichar(ClassId));
591  }
592  assert(UnusedClassIdIn(IntTemplates, ClassId));
593  IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs);
594  FontSet fs;
595  fs.size = FClass->font_set.size();
596  fs.configs = new int[fs.size];
597  for (int i = 0; i < fs.size; ++i) {
598  fs.configs[i] = FClass->font_set.get(i);
599  }
600  if (this->fontset_table_.contains(fs)) {
601  IClass->font_set_id = this->fontset_table_.get_id(fs);
602  delete[] fs.configs;
603  } else {
604  IClass->font_set_id = this->fontset_table_.push_back(fs);
605  }
606  AddIntClass(IntTemplates, ClassId, IClass);
607 
608  for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) {
609  AddIntProto(IClass);
610  ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass);
611  AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass,
613  AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates);
614  }
615 
616  for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) {
617  AddIntConfig(IClass);
618  ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass);
619  }
620  }
621  return (IntTemplates);
622 } /* CreateIntTemplates */
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:381
#define ProtoIn(Class, Pid)
Definition: protos.h:123
inT16 NumConfigs
Definition: protos.h:62
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:518
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
Definition: intproto.cpp:337
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:673
UnicityTableEqEq< int > font_set
Definition: protos.h:65
inT16 NumProtos
Definition: protos.h:59
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:274
int classify_learning_debug_level
Definition: classify.h:419
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:298
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:484
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:739
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:241
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
const T & get(int id) const
Return the object from an id.
int size() const
Return the size used.
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
int size() const
Definition: unicharset.h:297
CONFIGS Configurations
Definition: protos.h:64
void tesseract::Classify::DebugAdaptiveClassifier ( TBLOB blob,
ADAPT_RESULTS Results 
)
Parameters
Blobblob whose classification is being debugged
Resultsresults of match being debugged

Globals: none

Note
Exceptions: none
History: Wed Mar 13 16:44:41 1991, DSJ, Created.

Definition at line 1550 of file adaptmatch.cpp.

1551  {
1552  if (static_classifier_ == NULL) return;
1553  INT_FX_RESULT_STRUCT fx_info;
1555  TrainingSample* sample =
1556  BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1557  if (sample == NULL) return;
1558  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
1559  Results->best_unichar_id);
1560 } /* DebugAdaptiveClassifier */
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
Pix * pix() const
Definition: normalis.h:248
const DENORM & denorm() const
Definition: blobs.h:340
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
Definition: cluster.h:32
#define NULL
Definition: host.h:144
void tesseract::Classify::DisplayAdaptedChar ( TBLOB blob,
INT_CLASS_STRUCT int_class 
)

Definition at line 979 of file adaptmatch.cpp.

979  {
980 #ifndef GRAPHICS_DISABLED
981  INT_FX_RESULT_STRUCT fx_info;
983  TrainingSample* sample =
985  &bl_features);
986  if (sample == NULL) return;
987 
988  UnicharRating int_result;
989  im_.Match(int_class, AllProtosOn, AllConfigsOn,
990  bl_features.size(), &bl_features[0],
993  tprintf("Best match to temp config %d = %4.1f%%.\n",
994  int_result.config, int_result.rating * 100.0);
996  uinT32 ConfigMask;
997  ConfigMask = 1 << int_result.config;
999  im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
1000  bl_features.size(), &bl_features[0],
1001  &int_result, classify_adapt_feature_threshold,
1002  6 | 0x19, matcher_debug_separate_windows);
1004  }
1005 #endif
1006 }
bool matcher_debug_separate_windows
Definition: classify.h:458
int size() const
Definition: genericvector.h:72
#define tprintf(...)
Definition: tprintf.h:31
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int classify_learning_debug_level
Definition: classify.h:419
bool classify_nonlinear_norm
Definition: classify.h:416
unsigned int uinT32
Definition: host.h:103
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:461
#define NO_DEBUG
Definition: adaptmatch.cpp:70
void UpdateMatchDisplay()
Definition: intproto.cpp:466
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
Definition: cluster.h:32
IntegerMatcher im_
Definition: classify.h:503
#define NULL
Definition: host.h:144
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int classify_adapt_feature_threshold
Definition: classify.h:447
void tesseract::Classify::DoAdaptiveMatch ( TBLOB Blob,
ADAPT_RESULTS Results 
)

This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.

Parameters
Blobblob to be classified
Resultsplace to put match results

Globals:

  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • matcher_reliable_adaptive_result rating limit for a great match
Note
Exceptions: none
History: Tue Mar 12 08:50:11 1991, DSJ, Created.

Definition at line 1586 of file adaptmatch.cpp.

1586  {
1587  UNICHAR_ID *Ambiguities;
1588 
1589  INT_FX_RESULT_STRUCT fx_info;
1591  TrainingSample* sample =
1593  &bl_features);
1594  if (sample == NULL) return;
1595 
1597  tess_cn_matching) {
1598  CharNormClassifier(Blob, *sample, Results);
1599  } else {
1600  Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
1601  AdaptedTemplates, Results);
1602  if ((!Results->match.empty() &&
1603  MarginalMatch(Results->best_rating,
1605  !tess_bn_matching) ||
1606  Results->match.empty()) {
1607  CharNormClassifier(Blob, *sample, Results);
1608  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1609  AmbigClassifier(bl_features, fx_info, Blob,
1612  Ambiguities,
1613  Results);
1614  }
1615  }
1616 
1617  // Force the blob to be classified as noise
1618  // if the results contain only fragments.
1619  // TODO(daria): verify that this is better than
1620  // just adding a NULL classification.
1621  if (!Results->HasNonfragment || Results->match.empty())
1622  ClassifyAsNoise(Results);
1623  delete sample;
1624 } /* DoAdaptiveMatch */
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
double matcher_reliable_adaptive_result
Definition: classify.h:421
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
bool classify_nonlinear_norm
Definition: classify.h:416
bool HasNonfragment
Definition: adaptmatch.cpp:84
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
int matcher_permanent_classes_min
Definition: classify.h:426
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
int UNICHAR_ID
Definition: unichar.h:33
bool empty() const
Definition: genericvector.h:84
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
Definition: cluster.h:32
bool MarginalMatch(float confidence, float matcher_great_threshold)
Definition: adaptmatch.cpp:122
#define NULL
Definition: host.h:144
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
void ClassifyAsNoise(ADAPT_RESULTS *Results)
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::EndAdaptiveClassifier ( )

This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Globals:

Note
Exceptions: none
History: Tue Mar 19 14:37:06 1991, DSJ, Created.

Definition at line 456 of file adaptmatch.cpp.

456  {
457  STRING Filename;
458  FILE *File;
459 
460  if (AdaptedTemplates != NULL &&
462  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
463  File = fopen (Filename.string(), "wb");
464  if (File == NULL)
465  cprintf ("Unable to save adapted templates to %s!\n", Filename.string());
466  else {
467  cprintf ("\nSaving adapted templates to %s ...", Filename.string());
468  fflush(stdout);
470  cprintf ("\n");
471  fclose(File);
472  }
473  }
474 
475  if (AdaptedTemplates != NULL) {
478  }
479  if (BackupAdaptedTemplates != NULL) {
482  }
483 
484  if (PreTrainedTemplates != NULL) {
487  }
489  FreeNormProtos();
490  if (AllProtosOn != NULL) {
495  AllProtosOn = NULL;
496  AllConfigsOn = NULL;
499  }
500  delete shape_table_;
501  shape_table_ = NULL;
502  if (static_classifier_ != NULL) {
503  delete static_classifier_;
504  static_classifier_ = NULL;
505  }
506 } /* EndAdaptiveClassifier */
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
void EndDangerousAmbigs()
Definition: stopper.cpp:368
bool classify_enable_adaptive_matcher
Definition: classify.h:409
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
bool classify_save_adapted_templates
Definition: classify.h:413
STRING imagefile
Definition: ccutil.h:74
BIT_VECTOR AllProtosOn
Definition: classify.h:480
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:505
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
ShapeTable * shape_table_
Definition: classify.h:512
Dict & getDict()
Definition: classify.h:65
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:764
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:55
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
Definition: strngs.h:44
#define NULL
Definition: host.h:144
BIT_VECTOR TempProtoMask
Definition: classify.h:483
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
const char * string() const
Definition: strngs.cpp:193
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::ExpandShapesAndApplyCorrections ( ADAPT_CLASS classes,
bool  debug,
int  class_id,
int  bottom,
int  top,
float  cp_rating,
int  blob_length,
int  matcher_multiplier,
const uinT8 cn_factors,
UnicharRating int_result,
ADAPT_RESULTS final_results 
)

Definition at line 1172 of file adaptmatch.cpp.

1176  {
1177  if (classes != NULL) {
1178  // Adapted result. Convert configs to fontinfo_ids.
1179  int_result->adapted = true;
1180  for (int f = 0; f < int_result->fonts.size(); ++f) {
1181  int_result->fonts[f].fontinfo_id =
1182  GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
1183  }
1184  } else {
1185  // Pre-trained result. Map fonts using font_sets_.
1186  int_result->adapted = false;
1187  for (int f = 0; f < int_result->fonts.size(); ++f) {
1188  int_result->fonts[f].fontinfo_id =
1190  int_result->fonts[f].fontinfo_id);
1191  }
1192  if (shape_table_ != NULL) {
1193  // Two possible cases:
1194  // 1. Flat shapetable. All unichar-ids of the shapes referenced by
1195  // int_result->fonts are the same. In this case build a new vector of
1196  // mapped fonts and replace the fonts in int_result.
1197  // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
1198  // by int_result. In this case, build a vector of UnicharRating to
1199  // gather together different font-ids for each unichar. Also covers case1.
1200  GenericVector<UnicharRating> mapped_results;
1201  for (int f = 0; f < int_result->fonts.size(); ++f) {
1202  int shape_id = int_result->fonts[f].fontinfo_id;
1203  const Shape& shape = shape_table_->GetShape(shape_id);
1204  for (int c = 0; c < shape.size(); ++c) {
1205  int unichar_id = shape[c].unichar_id;
1206  if (!unicharset.get_enabled(unichar_id)) continue;
1207  // Find the mapped_result for unichar_id.
1208  int r = 0;
1209  for (r = 0; r < mapped_results.size() &&
1210  mapped_results[r].unichar_id != unichar_id; ++r) {}
1211  if (r == mapped_results.size()) {
1212  mapped_results.push_back(*int_result);
1213  mapped_results[r].unichar_id = unichar_id;
1214  mapped_results[r].fonts.truncate(0);
1215  }
1216  for (int i = 0; i < shape[c].font_ids.size(); ++i) {
1217  mapped_results[r].fonts.push_back(
1218  ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
1219  }
1220  }
1221  }
1222  for (int m = 0; m < mapped_results.size(); ++m) {
1223  mapped_results[m].rating =
1224  ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
1225  cp_rating, int_result->rating,
1226  int_result->feature_misses, bottom, top,
1227  blob_length, matcher_multiplier, cn_factors);
1228  AddNewResult(mapped_results[m], final_results);
1229  }
1230  return;
1231  }
1232  }
1233  if (unicharset.get_enabled(class_id)) {
1234  int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
1235  int_result->rating,
1236  int_result->feature_misses,
1237  bottom, top, blob_length,
1238  matcher_multiplier, cn_factors);
1239  AddNewResult(*int_result, final_results);
1240  }
1241 }
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
int size() const
Definition: genericvector.h:72
void truncate(int size)
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
int push_back(T object)
UNICHARSET unicharset
Definition: ccutil.h:72
ShapeTable * shape_table_
Definition: classify.h:512
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826
#define NULL
Definition: host.h:144
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
void tesseract::Classify::ExtractFeatures ( const TBLOB blob,
bool  nonlinear_norm,
GenericVector< INT_FEATURE_STRUCT > *  bl_features,
GenericVector< INT_FEATURE_STRUCT > *  cn_features,
INT_FX_RESULT_STRUCT results,
GenericVector< int > *  outline_cn_counts 
)
static

Definition at line 445 of file intfx.cpp.

450  {
451  DENORM bl_denorm, cn_denorm;
452  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm,
453  &bl_denorm, &cn_denorm, results);
454  if (outline_cn_counts != NULL)
455  outline_cn_counts->truncate(0);
456  // Iterate the outlines.
457  for (TESSLINE* ol = blob.outlines; ol != NULL; ol = ol->next) {
458  // Iterate the polygon.
459  EDGEPT* loop_pt = ol->FindBestStartPt();
460  EDGEPT* pt = loop_pt;
461  if (pt == NULL) continue;
462  do {
463  if (pt->IsHidden()) continue;
464  // Find a run of equal src_outline.
465  EDGEPT* last_pt = pt;
466  do {
467  last_pt = last_pt->next;
468  } while (last_pt != loop_pt && !last_pt->IsHidden() &&
469  last_pt->src_outline == pt->src_outline);
470  last_pt = last_pt->prev;
471  // Until the adaptive classifier can be weaned off polygon segments,
472  // we have to force extraction from the polygon for the bl_features.
473  ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength,
474  true, bl_features);
475  ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength,
476  false, cn_features);
477  pt = last_pt;
478  } while ((pt = pt->next) != loop_pt);
479  if (outline_cn_counts != NULL)
480  outline_cn_counts->push_back(cn_features->size());
481  }
482  results->NumBL = bl_features->size();
483  results->NumCN = cn_features->size();
484  results->YBottom = blob.bounding_box().bottom();
485  results->YTop = blob.bounding_box().top();
486  results->Width = blob.bounding_box().width();
487 }
int size() const
Definition: genericvector.h:72
void truncate(int size)
int push_back(T object)
EDGEPT * prev
Definition: blobs.h:170
EDGEPT * next
Definition: blobs.h:169
bool IsHidden() const
Definition: blobs.h:153
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
inT16 bottom() const
Definition: rect.h:61
C_OUTLINE * src_outline
Definition: blobs.h:171
inT16 width() const
Definition: rect.h:111
Definition: blobs.h:76
#define NULL
Definition: host.h:144
TBOX bounding_box() const
Definition: blobs.cpp:482
TESSLINE * outlines
Definition: blobs.h:377
const double kStandardFeatureLength
Definition: intfx.h:46
inT16 top() const
Definition: rect.h:54
FEATURE_SET tesseract::Classify::ExtractIntCNFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)

Definition at line 228 of file picofeat.cpp.

229  {
230 /*
231  ** Parameters:
232  ** blob blob to extract features from
233  ** denorm normalization/denormalization parameters.
234  ** Return: Integer character-normalized features for blob.
235  ** Exceptions: none
236  ** History: 8/8/2011, rays, Created.
237  */
238  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
241  blob, false, &local_fx_info, &bl_features);
242  if (sample == NULL) return NULL;
243 
244  int num_features = sample->num_features();
245  const INT_FEATURE_STRUCT* features = sample->features();
246  FEATURE_SET feature_set = NewFeatureSet(num_features);
247  for (int f = 0; f < num_features; ++f) {
248  FEATURE feature = NewFeature(&IntFeatDesc);
249 
250  feature->Params[IntX] = features[f].X;
251  feature->Params[IntY] = features[f].Y;
252  feature->Params[IntDir] = features[f].Theta;
253  AddFeature(feature_set, feature);
254  }
255  delete sample;
256 
257  return feature_set;
258 } /* ExtractIntCNFeatures */
const INT_FEATURE_STRUCT * features() const
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
FEATURE_SET NewFeatureSet(int NumFeatures)
Definition: picofeat.h:29
const FEATURE_DESC_STRUCT IntFeatDesc
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:35
Definition: cluster.h:32
Definition: picofeat.h:30
#define NULL
Definition: host.h:144
FEATURE_SET tesseract::Classify::ExtractIntGeoFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)

Definition at line 261 of file picofeat.cpp.

262  {
263 /*
264  ** Parameters:
265  ** blob blob to extract features from
266  ** denorm normalization/denormalization parameters.
267  ** Return: Geometric (top/bottom/width) features for blob.
268  ** Exceptions: none
269  ** History: 8/8/2011, rays, Created.
270  */
271  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
274  blob, false, &local_fx_info, &bl_features);
275  if (sample == NULL) return NULL;
276 
277  FEATURE_SET feature_set = NewFeatureSet(1);
278  FEATURE feature = NewFeature(&IntFeatDesc);
279 
280  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
281  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
282  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
283  AddFeature(feature_set, feature);
284  delete sample;
285 
286  return feature_set;
287 } /* ExtractIntGeoFeatures */
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
FEATURE_SET NewFeatureSet(int NumFeatures)
const FEATURE_DESC_STRUCT IntFeatDesc
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:35
Definition: cluster.h:32
#define NULL
Definition: host.h:144
int geo_feature(int index) const
FEATURE_SET tesseract::Classify::ExtractOutlineFeatures ( TBLOB Blob)

Definition at line 36 of file outfeat.cpp.

36  {
37 /*
38  ** Parameters:
39  ** Blob blob to extract pico-features from
40  ** LineStats statistics on text row blob is in
41  ** Globals: none
42  ** Operation: Convert each segment in the outline to a feature
43  ** and return the features.
44  ** Return: Outline-features for Blob.
45  ** Exceptions: none
46  ** History: 11/13/90, DSJ, Created.
47  ** 05/24/91, DSJ, Updated for either char or baseline normalize.
48  */
49  LIST Outlines;
50  LIST RemainingOutlines;
51  MFOUTLINE Outline;
52  FEATURE_SET FeatureSet;
53  FLOAT32 XScale, YScale;
54 
55  FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES);
56  if (Blob == NULL)
57  return (FeatureSet);
58 
59  Outlines = ConvertBlob (Blob);
60 
61  NormalizeOutlines(Outlines, &XScale, &YScale);
62  RemainingOutlines = Outlines;
63  iterate(RemainingOutlines) {
64  Outline = (MFOUTLINE) first_node (RemainingOutlines);
65  ConvertToOutlineFeatures(Outline, FeatureSet);
66  }
68  NormalizeOutlineX(FeatureSet);
69  FreeOutlines(Outlines);
70  return (FeatureSet);
71 } /* ExtractOutlineFeatures */
#define first_node(l)
Definition: oldlist.h:139
float FLOAT32
Definition: host.h:111
FEATURE_SET NewFeatureSet(int NumFeatures)
#define MAX_OUTLINE_FEATURES
Definition: outfeat.h:35
#define iterate(l)
Definition: oldlist.h:159
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: outfeat.cpp:111
void NormalizeOutlineX(FEATURE_SET FeatureSet)
Definition: outfeat.cpp:155
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:39
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:175
#define NULL
Definition: host.h:144
LIST MFOUTLINE
Definition: mfoutline.h:33
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:295
FEATURE_SET tesseract::Classify::ExtractPicoFeatures ( TBLOB Blob)

Definition at line 57 of file picofeat.cpp.

57  {
58 /*
59  ** Parameters:
60  ** Blob blob to extract pico-features from
61  ** LineStats statistics on text row blob is in
62  ** Globals:
63  ** classify_norm_method normalization method currently specified
64  ** Operation: Dummy for now.
65  ** Return: Pico-features for Blob.
66  ** Exceptions: none
67  ** History: 9/4/90, DSJ, Created.
68  */
69  LIST Outlines;
70  LIST RemainingOutlines;
71  MFOUTLINE Outline;
72  FEATURE_SET FeatureSet;
73  FLOAT32 XScale, YScale;
74 
75  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
76  Outlines = ConvertBlob(Blob);
77  NormalizeOutlines(Outlines, &XScale, &YScale);
78  RemainingOutlines = Outlines;
79  iterate(RemainingOutlines) {
80  Outline = (MFOUTLINE) first_node (RemainingOutlines);
81  ConvertToPicoFeatures2(Outline, FeatureSet);
82  }
84  NormalizePicoX(FeatureSet);
85  FreeOutlines(Outlines);
86  return (FeatureSet);
87 
88 } /* ExtractPicoFeatures */
#define first_node(l)
Definition: oldlist.h:139
float FLOAT32
Definition: host.h:111
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:197
FEATURE_SET NewFeatureSet(int NumFeatures)
#define iterate(l)
Definition: oldlist.h:159
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:151
#define MAX_PICO_FEATURES
Definition: picofeat.h:47
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:39
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:175
LIST MFOUTLINE
Definition: mfoutline.h:33
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:295
void tesseract::Classify::FreeNormProtos ( )

Definition at line 161 of file normmatch.cpp.

161  {
162  if (NormProtos != NULL) {
163  for (int i = 0; i < NormProtos->NumProtos; i++)
167  Efree(NormProtos);
168  NormProtos = NULL;
169  }
170 }
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
LIST * Protos
Definition: normmatch.cpp:42
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:564
void Efree(void *ptr)
Definition: emalloc.cpp:85
#define NULL
Definition: host.h:144
NORM_PROTOS * NormProtos
Definition: classify.h:486
UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( )
inline

Definition at line 345 of file classify.h.

345  {
346  return fontinfo_table_;
347  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
const UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( ) const
inline

Definition at line 348 of file classify.h.

348  {
349  return fontinfo_table_;
350  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
UnicityTable<FontSet>& tesseract::Classify::get_fontset_table ( )
inline

Definition at line 351 of file classify.h.

351  {
352  return fontset_table_;
353  }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
int tesseract::Classify::GetAdaptiveFeatures ( TBLOB Blob,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

This routine sets up the feature extractor to extract baseline normalized pico-features.

The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.

Globals: none

Parameters
Blobblob to extract features from
[out]IntFeaturesarray to fill with integer features
[out]FloatFeaturesplace to return actual floating-pt features
Returns
Number of pico-features returned (0 if an error occurred)
Note
Exceptions: none
History: Tue Mar 12 17:55:18 1991, DSJ, Created.

Definition at line 812 of file adaptmatch.cpp.

814  {
815  FEATURE_SET Features;
816  int NumFeatures;
817 
818  classify_norm_method.set_value(baseline);
819  Features = ExtractPicoFeatures(Blob);
820 
821  NumFeatures = Features->NumFeatures;
822  if (NumFeatures > UNLIKELY_NUM_FEAT) {
823  FreeFeatureSet(Features);
824  return 0;
825  }
826 
827  ComputeIntFeatures(Features, IntFeatures);
828  *FloatFeatures = Features;
829 
830  return NumFeatures;
831 } /* GetAdaptiveFeatures */
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:57
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:100
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:79
UNICHAR_ID * tesseract::Classify::GetAmbiguities ( TBLOB Blob,
CLASS_ID  CorrectClass 
)

This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters
Blobblob to get classification ambiguities for
CorrectClasscorrect class for Blob

Globals:

  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns
String containing all possible ambiguous classes.
Note
Exceptions: none
History: Fri Mar 15 08:08:22 1991, DSJ, Created.

Definition at line 1643 of file adaptmatch.cpp.

1644  {
1645  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
1646  UNICHAR_ID *Ambiguities;
1647  int i;
1648 
1649  Results->Initialize();
1650  INT_FX_RESULT_STRUCT fx_info;
1652  TrainingSample* sample =
1654  &bl_features);
1655  if (sample == NULL) {
1656  delete Results;
1657  return NULL;
1658  }
1659 
1660  CharNormClassifier(Blob, *sample, Results);
1661  delete sample;
1662  RemoveBadMatches(Results);
1664 
1665  /* copy the class id's into an string of ambiguities - don't copy if
1666  the correct class is the only class id matched */
1667  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
1668  if (Results->match.size() > 1 ||
1669  (Results->match.size() == 1 &&
1670  Results->match[0].unichar_id != CorrectClass)) {
1671  for (i = 0; i < Results->match.size(); i++)
1672  Ambiguities[i] = Results->match[i].unichar_id;
1673  Ambiguities[i] = -1;
1674  } else {
1675  Ambiguities[0] = -1;
1676  }
1677 
1678  delete Results;
1679  return Ambiguities;
1680 } /* GetAmbiguities */
int size() const
Definition: genericvector.h:72
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
bool classify_nonlinear_norm
Definition: classify.h:416
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
void RemoveBadMatches(ADAPT_RESULTS *Results)
int UNICHAR_ID
Definition: unichar.h:33
void Initialize()
Definition: adaptmatch.cpp:93
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
Definition: cluster.h:32
#define NULL
Definition: host.h:144
int tesseract::Classify::GetCharNormFeature ( const INT_FX_RESULT_STRUCT fx_info,
INT_TEMPLATES  templates,
uinT8 pruner_norm_array,
uinT8 char_norm_array 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.

The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Parameters
Blobblob to extract features from
Templatesused to compute char norm adjustments
IntFeaturesarray to fill with integer features
PrunerNormArrayArray of factors from blob normalization process
CharNormArrayarray to fill with dummy char norm adjustments
BlobLengthlength of blob in baseline-normalized units

Globals:

Returns
Number of features extracted or 0 if an error occured.
Note
Exceptions: none
History: Tue May 28 10:40:52 1991, DSJ, Created.

Definition at line 1733 of file adaptmatch.cpp.

1736  {
1737  FEATURE norm_feature = NewFeature(&CharNormDesc);
1738  float baseline = kBlnBaselineOffset;
1739  float scale = MF_SCALE_FACTOR;
1740  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
1741  norm_feature->Params[CharNormLength] =
1742  fx_info.Length * scale / LENGTH_COMPRESSION;
1743  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
1744  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
1745  // Deletes norm_feature.
1746  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
1747  pruner_norm_array);
1748  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
1749 } /* GetCharNormFeature */
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
const FEATURE_DESC_STRUCT CharNormDesc
const int kBlnBaselineOffset
Definition: normalis.h:29
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
int IntCastRounded(double x)
Definition: helpers.h:172
const double kStandardFeatureLength
Definition: intfx.h:46
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
CLASS_ID tesseract::Classify::GetClassToDebug ( const char *  Prompt,
bool *  adaptive_on,
bool *  pretrained_on,
int *  shape_id 
)

Definition at line 1422 of file intproto.cpp.

1423  {
1424 /*
1425  ** Parameters:
1426  ** Prompt prompt to print while waiting for input from window
1427  ** Globals: none
1428  ** Operation: This routine prompts the user with Prompt and waits
1429  ** for the user to enter something in the debug window.
1430  ** Return: Character entered in the debug window.
1431  ** Exceptions: none
1432  ** History: Thu Mar 21 16:55:13 1991, DSJ, Created.
1433  */
1434  tprintf("%s\n", Prompt);
1435  SVEvent* ev;
1436  SVEventType ev_type;
1437  int unichar_id = INVALID_UNICHAR_ID;
1438  // Wait until a click or popup event.
1439  do {
1441  ev_type = ev->type;
1442  if (ev_type == SVET_POPUP) {
1443  if (ev->command_id == IDA_SHAPE_INDEX) {
1444  if (shape_table_ != NULL) {
1445  *shape_id = atoi(ev->parameter);
1446  *adaptive_on = false;
1447  *pretrained_on = true;
1448  if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
1449  int font_id;
1450  shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id,
1451  &font_id);
1452  tprintf("Shape %d, first unichar=%d, font=%d\n",
1453  *shape_id, unichar_id, font_id);
1454  return unichar_id;
1455  }
1456  tprintf("Shape index '%s' not found in shape table\n", ev->parameter);
1457  } else {
1458  tprintf("No shape table loaded!\n");
1459  }
1460  } else {
1462  unichar_id = unicharset.unichar_to_id(ev->parameter);
1463  if (ev->command_id == IDA_ADAPTIVE) {
1464  *adaptive_on = true;
1465  *pretrained_on = false;
1466  *shape_id = -1;
1467  } else if (ev->command_id == IDA_STATIC) {
1468  *adaptive_on = false;
1469  *pretrained_on = true;
1470  } else {
1471  *adaptive_on = true;
1472  *pretrained_on = true;
1473  }
1474  if (ev->command_id == IDA_ADAPTIVE || shape_table_ == NULL) {
1475  *shape_id = -1;
1476  return unichar_id;
1477  }
1478  for (int s = 0; s < shape_table_->NumShapes(); ++s) {
1479  if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
1480  tprintf("%s\n", shape_table_->DebugStr(s).string());
1481  }
1482  }
1483  } else {
1484  tprintf("Char class '%s' not found in unicharset",
1485  ev->parameter);
1486  }
1487  }
1488  }
1489  delete ev;
1490  } while (ev_type != SVET_CLICK);
1491  return 0;
1492 } /* GetClassToDebug */
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
int command_id
Definition: scrollview.h:70
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:156
SVEventType
Definition: scrollview.h:45
ShapeTable * shape_table_
Definition: classify.h:512
ScrollView * IntMatchWindow
Definition: intproto.cpp:181
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:449
char * parameter
Definition: scrollview.h:71
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:291
SVEventType type
Definition: scrollview.h:64
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
#define NULL
Definition: host.h:144
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:414
const char * string() const
Definition: strngs.cpp:193
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
int NumShapes() const
Definition: shapetable.h:278
Dict& tesseract::Classify::getDict ( )
inline

Definition at line 65 of file classify.h.

65  {
66  return dict_;
67  }
int tesseract::Classify::GetFontinfoId ( ADAPT_CLASS  Class,
uinT8  ConfigId 
)

Definition at line 190 of file adaptive.cpp.

190  {
191  return (ConfigIsPermanent(Class, ConfigId) ?
192  PermConfigFor(Class, ConfigId)->FontinfoId :
193  TempConfigFor(Class, ConfigId)->FontinfoId);
194 }
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
void tesseract::Classify::InitAdaptedClass ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
ADAPT_CLASS  Class,
ADAPT_TEMPLATES  Templates 
)

This routine creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters
Blobblob to model new class after
ClassIdid of the class to be initialized
FontinfoIdfont information inferred from pre-trained templates
Classadapted class to be initialized
Templatesadapted templates to add new class to

Globals:

Note
Exceptions: none
History: Thu Mar 14 12:49:39 1991, DSJ, Created.

Definition at line 717 of file adaptmatch.cpp.

721  {
722  FEATURE_SET Features;
723  int Fid, Pid;
724  FEATURE Feature;
725  int NumFeatures;
726  TEMP_PROTO TempProto;
727  PROTO Proto;
728  INT_CLASS IClass;
730 
731  classify_norm_method.set_value(baseline);
732  Features = ExtractOutlineFeatures(Blob);
733  NumFeatures = Features->NumFeatures;
734  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
735  FreeFeatureSet(Features);
736  return;
737  }
738 
739  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
740  TempConfigFor(Class, 0) = Config;
741 
742  /* this is a kludge to construct cutoffs for adapted templates */
743  if (Templates == AdaptedTemplates)
744  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
745 
746  IClass = ClassForClassId (Templates->Templates, ClassId);
747 
748  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
749  Pid = AddIntProto (IClass);
750  assert (Pid != NO_PROTO);
751 
752  Feature = Features->Features[Fid];
753  TempProto = NewTempProto ();
754  Proto = &(TempProto->Proto);
755 
756  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
757  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
758  instead of the -0.25 to 0.75 used in baseline normalization */
759  Proto->Angle = Feature->Params[OutlineFeatDir];
760  Proto->X = Feature->Params[OutlineFeatX];
761  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
762  Proto->Length = Feature->Params[OutlineFeatLength];
763  FillABC(Proto);
764 
765  TempProto->ProtoId = Pid;
766  SET_BIT (Config->Protos, Pid);
767 
768  ConvertProto(Proto, Pid, IClass);
769  AddProtoToProtoPruner(Proto, Pid, IClass,
771 
772  Class->TempProtos = push (Class->TempProtos, TempProto);
773  }
774  FreeFeatureSet(Features);
775 
776  AddIntConfig(IClass);
777  ConvertConfig (AllProtosOn, 0, IClass);
778 
780  tprintf("Added new class '%s' with class id %d and %d protos.\n",
781  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
783  DisplayAdaptedChar(Blob, IClass);
784  }
785 
786  if (IsEmptyAdaptedClass(Class))
787  (Templates->NumNonEmptyClasses)++;
788 } /* InitAdaptedClass */
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:381
#define tprintf(...)
Definition: tprintf.h:31
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
UNICHARSET unicharset
Definition: ccutil.h:72
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
uinT16 ProtoId
Definition: adaptive.h:30
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:518
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
CLUSTERCONFIG Config
FEATURE Features[1]
Definition: ocrfeatures.h:72
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:274
int classify_learning_debug_level
Definition: classify.h:419
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:298
FLOAT32 X
Definition: protos.h:47
PROTO_STRUCT Proto
Definition: adaptive.h:32
FLOAT32 Angle
Definition: protos.h:49
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:484
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
#define NO_PROTO
Definition: matchdefs.h:42
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:979
#define ClassForClassId(T, c)
Definition: intproto.h:181
INT_TEMPLATES Templates
Definition: adaptive.h:77
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75
void FillABC(PROTO Proto)
Definition: protos.cpp:198
FLOAT32 Length
Definition: protos.h:50
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:36
#define SET_BIT(array, bit)
Definition: bitvec.h:57
LIST push(LIST list, void *element)
Definition: oldlist.cpp:323
BIT_VECTOR Protos
Definition: adaptive.h:45
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:79
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
FLOAT32 Y
Definition: protos.h:48
void tesseract::Classify::InitAdaptiveClassifier ( bool  load_pre_trained_templates)

This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be lodaded. Should only be set to true if the necesary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates

Note
History: Mon Mar 11 12:49:34 1991, DSJ, Created.

Definition at line 527 of file adaptmatch.cpp.

527  {
529  return;
530  if (AllProtosOn != NULL)
531  EndAdaptiveClassifier(); // Don't leak with multiple inits.
532 
533  // If there is no language_data_path_prefix, the classifier will be
534  // adaptive only.
535  if (language_data_path_prefix.length() > 0 &&
536  load_pre_trained_templates) {
540  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded inttemp\n");
541 
543  shape_table_ = new ShapeTable(unicharset);
546  tprintf("Error loading shape table!\n");
547  delete shape_table_;
548  shape_table_ = NULL;
549  } else if (tessdata_manager.DebugLevel() > 0) {
550  tprintf("Successfully loaded shape table!\n");
551  }
552  }
553 
558  CharNormCutoffs);
559  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded pffmtable\n");
560 
562  NormProtos =
565  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded normproto\n");
566  static_classifier_ = new TessClassifier(false, this);
567  }
568 
570  InitIntegerFX();
571 
579 
580  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
581  BaselineCutoffs[i] = 0;
582  }
583 
585  FILE *File;
586  STRING Filename;
587 
588  Filename = imagefile;
589  Filename += ADAPT_TEMPLATE_SUFFIX;
590  File = fopen(Filename.string(), "rb");
591  if (File == NULL) {
593  } else {
594  cprintf("\nReading pre-adapted templates from %s ...\n",
595  Filename.string());
596  fflush(stdout);
598  cprintf("\n");
599  fclose(File);
601 
602  for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
603  BaselineCutoffs[i] = CharNormCutoffs[i];
604  }
605  }
606  } else {
607  if (AdaptedTemplates != NULL)
610  }
611 } /* InitAdaptiveClassifier */
FILE * GetDataFilePtr() const
#define set_all_bits(array, length)
Definition: bitvec.h:41
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
Definition: cutoffs.cpp:42
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
bool classify_enable_adaptive_matcher
Definition: classify.h:409
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
Definition: adaptive.cpp:369
inT64 GetEndOffset(TessdataType tessdata_type) const
inT32 length() const
Definition: strngs.cpp:188
bool DeSerialize(bool swap, FILE *fp)
Definition: shapetable.cpp:256
STRING imagefile
Definition: ccutil.h:74
TessdataManager tessdata_manager
Definition: ccutil.h:71
BIT_VECTOR AllProtosOn
Definition: classify.h:480
#define ASSERT_HOST(x)
Definition: errcode.h:84
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:90
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
ShapeTable * shape_table_
Definition: classify.h:512
bool classify_use_pre_adapted_templates
Definition: classify.h:411
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:697
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:776
#define zero_all_bits(array, length)
Definition: bitvec.h:33
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
STRING language_data_path_prefix
Definition: ccutil.h:70
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
INT_TEMPLATES Templates
Definition: adaptive.h:77
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:273
bool SeekToStart(TessdataType tessdata_type)
void InitIntegerFX()
Definition: intfx.cpp:55
IntegerMatcher im_
Definition: classify.h:503
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456
Definition: strngs.h:44
#define NULL
Definition: host.h:144
BIT_VECTOR TempProtoMask
Definition: classify.h:483
#define MAX_NUM_PROTOS
Definition: intproto.h:47
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
const char * string() const
Definition: strngs.cpp:193
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
NORM_PROTOS * ReadNormProtos(FILE *File, inT64 end_offset)
Definition: normmatch.cpp:234
NORM_PROTOS * NormProtos
Definition: classify.h:486
bool tesseract::Classify::LargeSpeckle ( const TBLOB blob)

Definition at line 235 of file classify.cpp.

235  {
236  double speckle_size = kBlnXHeight * speckle_large_max_size;
237  TBOX bbox = blob.bounding_box();
238  return bbox.width() < speckle_size && bbox.height() < speckle_size;
239 }
const int kBlnXHeight
Definition: normalis.h:28
double speckle_large_max_size
Definition: classify.h:501
inT16 height() const
Definition: rect.h:104
inT16 width() const
Definition: rect.h:111
Definition: rect.h:30
TBOX bounding_box() const
Definition: blobs.cpp:482
void tesseract::Classify::LearnBlob ( const STRING fontname,
TBLOB Blob,
const DENORM cn_denorm,
const INT_FX_RESULT_STRUCT fx_info,
const char *  blob_text 
)

Definition at line 69 of file blobclass.cpp.

72  {
74  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
75  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
76  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
77  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
78 
79  if (ValidCharDescription(feature_defs_, CharDesc)) {
80  // Label the features with a class name and font name.
81  tr_file_data_ += "\n";
82  tr_file_data_ += fontname;
83  tr_file_data_ += " ";
84  tr_file_data_ += blob_text;
85  tr_file_data_ += "\n";
86 
87  // write micro-features to file and clean up
88  WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
89  } else {
90  tprintf("Blob learned was invalid!\n");
91  }
92  FreeCharDescription(CharDesc);
93 } // LearnBlob
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
Definition: normfeat.cpp:62
void FreeCharDescription(CHAR_DESC CharDesc)
Definition: featdefs.cpp:141
#define tprintf(...)
Definition: tprintf.h:31
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:44
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str)
Definition: featdefs.cpp:197
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &cn_denorm)
Definition: mf.cpp:36
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc)
Definition: featdefs.cpp:219
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:261
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:507
CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs)
Definition: featdefs.cpp:164
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:228
void tesseract::Classify::LearnPieces ( const char *  fontname,
int  start,
int  length,
float  threshold,
CharSegmentationType  segmentation,
const char *  correct_text,
WERD_RES word 
)

Definition at line 368 of file adaptmatch.cpp.

370  {
371  // TODO(daria) Remove/modify this if/when we want
372  // to train and/or adapt to n-grams.
373  if (segmentation != CST_WHOLE &&
374  (segmentation != CST_FRAGMENT || disable_character_fragments))
375  return;
376 
377  if (length > 1) {
378  SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
379  start + length - 1);
380  }
381  TBLOB* blob = word->chopped_word->blobs[start];
382  // Rotate the blob if needed for classification.
383  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded();
384  if (rotated_blob == NULL)
385  rotated_blob = blob;
386 
387  #ifndef GRAPHICS_DISABLED
388  // Draw debug windows showing the blob that is being learned if needed.
389  if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) {
390  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
391  word->chopped_word->bounding_box());
392  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
393  learn_debug_win_->Update();
394  window_wait(learn_debug_win_);
395  }
396  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
397  ASSERT_HOST(learn_fragments_debug_win_ != NULL); // set up in LearnWord
398  blob->plot(learn_fragments_debug_win_,
400  learn_fragments_debug_win_->Update();
401  }
402  #endif // GRAPHICS_DISABLED
403 
404  if (fontname != NULL) {
405  classify_norm_method.set_value(character); // force char norm spc 30/11/93
406  tess_bn_matching.set_value(false); // turn it off
407  tess_cn_matching.set_value(false);
408  DENORM bl_denorm, cn_denorm;
409  INT_FX_RESULT_STRUCT fx_info;
411  &bl_denorm, &cn_denorm, &fx_info);
412  LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
413  } else if (unicharset.contains_unichar(correct_text)) {
414  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
415  int font_id = word->fontinfo != NULL
416  ? fontinfo_table_.get_id(*word->fontinfo)
417  : 0;
419  tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
420  unicharset.id_to_unichar(class_id), threshold, font_id);
421  // If filename is not NULL we are doing recognition
422  // (as opposed to training), so we must have already set word fonts.
423  AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
424  if (BackupAdaptedTemplates != NULL) {
425  // Adapt the backup templates too. They will be used if the primary gets
426  // too full.
427  AdaptToChar(rotated_blob, class_id, font_id, threshold,
429  }
430  } else if (classify_debug_level >= 1) {
431  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
432  }
433  if (rotated_blob != blob) {
434  delete rotated_blob;
435  }
436 
437  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
438  start + length - 1);
439 } // LearnPieces.
Definition: blobs.h:261
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
TWERD * chopped_word
Definition: pageres.h:201
static void Update()
Definition: scrollview.cpp:715
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
const FontInfo * fontinfo
Definition: pageres.h:288
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
int classify_learning_debug_level
Definition: classify.h:419
#define ASSERT_HOST(x)
Definition: errcode.h:84
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:524
bool classify_nonlinear_norm
Definition: classify.h:416
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
char * classify_learn_debug_str
Definition: classify.h:459
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:887
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
int UNICHAR_ID
Definition: unichar.h:33
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:363
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:216
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
TBOX bounding_box() const
Definition: blobs.cpp:881
bool classify_debug_character_fragments
Definition: classify.h:455
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:69
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
bool disable_character_fragments
Definition: classify.h:450
#define NULL
Definition: host.h:144
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
Definition: seam.cpp:194
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::LearnWord ( const char *  fontname,
WERD_RES word 
)

Definition at line 244 of file adaptmatch.cpp.

244  {
245  int word_len = word->correct_text.size();
246  if (word_len == 0) return;
247 
248  float* thresholds = NULL;
249  if (fontname == NULL) {
250  // Adaption mode.
251  if (!EnableLearning || word->best_choice == NULL)
252  return; // Can't or won't adapt.
253 
255  tprintf("\n\nAdapting to word = %s\n",
256  word->best_choice->debug_string().string());
257  thresholds = new float[word_len];
261  matcher_rating_margin, thresholds);
262  }
263  int start_blob = 0;
264 
265  #ifndef GRAPHICS_DISABLED
267  if (learn_fragmented_word_debug_win_ != NULL) {
268  window_wait(learn_fragmented_word_debug_win_);
269  }
270  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
271  word->chopped_word->bounding_box());
272  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
273  word->chopped_word->bounding_box());
274  word->chopped_word->plot(learn_fragmented_word_debug_win_);
276  }
277  #endif // GRAPHICS_DISABLED
278 
279  for (int ch = 0; ch < word_len; ++ch) {
281  tprintf("\nLearning %s\n", word->correct_text[ch].string());
282  }
283  if (word->correct_text[ch].length() > 0) {
284  float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
285 
286  LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
287  CST_WHOLE, word->correct_text[ch].string(), word);
288 
289  if (word->best_state[ch] > 1 && !disable_character_fragments) {
290  // Check that the character breaks into meaningful fragments
291  // that each match a whole character with at least
292  // classify_character_fragments_garbage_certainty_threshold
293  bool garbage = false;
294  int frag;
295  for (frag = 0; frag < word->best_state[ch]; ++frag) {
296  TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag];
298  garbage |= LooksLikeGarbage(frag_blob);
299  }
300  }
301  // Learn the fragments.
302  if (!garbage) {
303  bool pieces_all_natural = word->PiecesAllNatural(start_blob,
304  word->best_state[ch]);
305  if (pieces_all_natural || !prioritize_division) {
306  for (frag = 0; frag < word->best_state[ch]; ++frag) {
307  GenericVector<STRING> tokens;
308  word->correct_text[ch].split(' ', &tokens);
309 
310  tokens[0] = CHAR_FRAGMENT::to_string(
311  tokens[0].string(), frag, word->best_state[ch],
312  pieces_all_natural);
313 
314  STRING full_string;
315  for (int i = 0; i < tokens.size(); i++) {
316  full_string += tokens[i];
317  if (i != tokens.size() - 1)
318  full_string += ' ';
319  }
320  LearnPieces(fontname, start_blob + frag, 1, threshold,
321  CST_FRAGMENT, full_string.string(), word);
322  }
323  }
324  }
325  }
326 
327  // TODO(rays): re-enable this part of the code when we switch to the
328  // new classifier that needs to see examples of garbage.
329  /*
330  if (word->best_state[ch] > 1) {
331  // If the next blob is good, make junk with the rightmost fragment.
332  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
333  LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
334  word->best_state[ch + 1] + 1,
335  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
336  }
337  // If the previous blob is good, make junk with the leftmost fragment.
338  if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
339  LearnPieces(fontname, start_blob - word->best_state[ch - 1],
340  word->best_state[ch - 1] + 1,
341  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
342  }
343  }
344  // If the next blob is good, make a join with it.
345  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
346  STRING joined_text = word->correct_text[ch];
347  joined_text += word->correct_text[ch + 1];
348  LearnPieces(fontname, start_blob,
349  word->best_state[ch] + word->best_state[ch + 1],
350  threshold, CST_NGRAM, joined_text.string(), word);
351  }
352  */
353  }
354  start_blob += word->best_state[ch];
355  }
356  delete [] thresholds;
357 } // LearnWord.
Definition: blobs.h:261
int size() const
Definition: genericvector.h:72
int length() const
Definition: genericvector.h:79
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
Definition: pageres.cpp:553
WERD_CHOICE * best_choice
Definition: pageres.h:219
TWERD * chopped_word
Definition: pageres.h:201
static void Update()
Definition: scrollview.cpp:715
#define tprintf(...)
Definition: tprintf.h:31
double matcher_good_threshold
Definition: classify.h:420
bool prioritize_division
Definition: classify.h:387
GenericVector< STRING > correct_text
Definition: pageres.h:259
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
int classify_learning_debug_level
Definition: classify.h:419
double matcher_perfect_threshold
Definition: classify.h:422
double matcher_rating_margin
Definition: classify.h:424
void plot(ScrollView *window)
Definition: blobs.cpp:918
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:368
double certainty_scale
Definition: classify.h:437
STRING to_string() const
Definition: unicharset.h:73
const STRING debug_string() const
Definition: ratngs.h:502
bool PiecesAllNatural(int start, int count) const
Definition: pageres.cpp:1072
TBOX bounding_box() const
Definition: blobs.cpp:881
bool classify_debug_character_fragments
Definition: classify.h:455
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
bool LooksLikeGarbage(TBLOB *blob)
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
bool disable_character_fragments
Definition: classify.h:450
Definition: strngs.h:44
GenericVector< int > best_state
Definition: pageres.h:255
#define NULL
Definition: host.h:144
const char * string() const
Definition: strngs.cpp:193
bool tesseract::Classify::LooksLikeGarbage ( TBLOB blob)

Definition at line 1684 of file adaptmatch.cpp.

1684  {
1685  BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
1686  AdaptiveClassifier(blob, ratings);
1687  BLOB_CHOICE_IT ratings_it(ratings);
1690  print_ratings_list("======================\nLooksLikeGarbage() got ",
1691  ratings, unicharset);
1692  }
1693  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1694  ratings_it.forward()) {
1695  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != NULL) {
1696  continue;
1697  }
1698  float certainty = ratings_it.data()->certainty();
1699  delete ratings;
1700  return certainty <
1702  }
1703  delete ratings;
1704  return true; // no whole characters in ratings
1705 }
UNICHARSET unicharset
Definition: ccutil.h:72
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:819
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
Dict & getDict()
Definition: classify.h:65
bool classify_debug_character_fragments
Definition: classify.h:455
const UNICHARSET & getUnicharset() const
Definition: dict.h:96
#define NULL
Definition: host.h:144
int tesseract::Classify::MakeNewTemporaryConfig ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  FontinfoId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)
Parameters
Templatesadapted templates to add new config to
ClassIdclass id to associate with new config
FontinfoIdfont information inferred from pre-trained templates
NumFeaturesnumber of features in IntFeatures
Featuresfeatures describing model for new config
FloatFeaturesfloating-pt representation of features
Returns
The id of the new config created, a negative integer in case of error.
Note
Exceptions: none
History: Fri Mar 15 08:49:46 1991, DSJ, Created.

Definition at line 1797 of file adaptmatch.cpp.

1802  {
1803  INT_CLASS IClass;
1804  ADAPT_CLASS Class;
1805  PROTO_ID OldProtos[MAX_NUM_PROTOS];
1806  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
1807  int NumOldProtos;
1808  int NumBadFeatures;
1809  int MaxProtoId, OldMaxProtoId;
1810  int BlobLength = 0;
1811  int MaskSize;
1812  int ConfigId;
1814  int i;
1815  int debug_level = NO_DEBUG;
1816 
1818  debug_level =
1820 
1821  IClass = ClassForClassId(Templates->Templates, ClassId);
1822  Class = Templates->Class[ClassId];
1823 
1824  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
1825  ++NumAdaptationsFailed;
1827  cprintf("Cannot make new temporary config: maximum number exceeded.\n");
1828  return -1;
1829  }
1830 
1831  OldMaxProtoId = IClass->NumProtos - 1;
1832 
1833  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
1834  BlobLength, NumFeatures, Features,
1835  OldProtos, classify_adapt_proto_threshold,
1836  debug_level);
1837 
1838  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
1839  zero_all_bits(TempProtoMask, MaskSize);
1840  for (i = 0; i < NumOldProtos; i++)
1841  SET_BIT(TempProtoMask, OldProtos[i]);
1842 
1843  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
1844  BlobLength, NumFeatures, Features,
1845  BadFeatures,
1847  debug_level);
1848 
1849  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
1850  IClass, Class, TempProtoMask);
1851  if (MaxProtoId == NO_PROTO) {
1852  ++NumAdaptationsFailed;
1854  cprintf("Cannot make new temp protos: maximum number exceeded.\n");
1855  return -1;
1856  }
1857 
1858  ConfigId = AddIntConfig(IClass);
1859  ConvertConfig(TempProtoMask, ConfigId, IClass);
1860  Config = NewTempConfig(MaxProtoId, FontinfoId);
1861  TempConfigFor(Class, ConfigId) = Config;
1863 
1865  cprintf("Making new temp config %d fontinfo id %d"
1866  " using %d old and %d new protos.\n",
1867  ConfigId, Config->FontinfoId,
1868  NumOldProtos, MaxProtoId - OldMaxProtoId);
1869 
1870  return ConfigId;
1871 } /* MakeNewTemporaryConfig */
#define PRINT_PROTO_MATCHES
Definition: intproto.h:194
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:190
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:626
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
CLUSTERCONFIG Config
uinT8 ProtoVectorSize
Definition: adaptive.h:42
BIT_VECTOR AllProtosOn
Definition: classify.h:480
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:274
int classify_learning_debug_level
Definition: classify.h:419
uinT8 FEATURE_ID
Definition: matchdefs.h:47
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
int classify_adapt_proto_threshold
Definition: classify.h:445
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:484
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
#define NO_DEBUG
Definition: adaptmatch.cpp:70
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
#define zero_all_bits(array, length)
Definition: bitvec.h:33
#define NO_PROTO
Definition: matchdefs.h:42
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
#define ClassForClassId(T, c)
Definition: intproto.h:181
#define copy_all_bits(source, dest, length)
Definition: bitvec.h:49
INT_TEMPLATES Templates
Definition: adaptive.h:77
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:193
IntegerMatcher im_
Definition: classify.h:503
uinT8 NumConfigs
Definition: intproto.h:110
#define SET_BIT(array, bit)
Definition: bitvec.h:57
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:547
inT16 PROTO_ID
Definition: matchdefs.h:41
BIT_VECTOR TempProtoMask
Definition: classify.h:483
#define MAX_NUM_PROTOS
Definition: intproto.h:47
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
BIT_VECTOR Protos
Definition: adaptive.h:45
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int classify_adapt_feature_threshold
Definition: classify.h:447
uinT16 NumProtos
Definition: intproto.h:108
PROTO_ID tesseract::Classify::MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)

This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Parameters
Featuresfloating-pt features describing new character
NumBadFeatnumber of bad features to turn into protos
BadFeatfeature id's of bad features
IClassinteger class templates to add new protos to
Classadapted class templates to add new protos to
TempProtoMaskproto mask to add new protos to

Globals: none

Returns
Max proto id in class after all protos have been added. Exceptions: none History: Fri Mar 15 11:39:38 1991, DSJ, Created.

Definition at line 1894 of file adaptmatch.cpp.

1899  {
1900  FEATURE_ID *ProtoStart;
1901  FEATURE_ID *ProtoEnd;
1902  FEATURE_ID *LastBad;
1903  TEMP_PROTO TempProto;
1904  PROTO Proto;
1905  FEATURE F1, F2;
1906  FLOAT32 X1, X2, Y1, Y2;
1907  FLOAT32 A1, A2, AngleDelta;
1908  FLOAT32 SegmentLength;
1909  PROTO_ID Pid;
1910 
1911  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1912  ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1913  F1 = Features->Features[*ProtoStart];
1914  X1 = F1->Params[PicoFeatX];
1915  Y1 = F1->Params[PicoFeatY];
1916  A1 = F1->Params[PicoFeatDir];
1917 
1918  for (ProtoEnd = ProtoStart + 1,
1919  SegmentLength = GetPicoFeatureLength();
1920  ProtoEnd < LastBad;
1921  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
1922  F2 = Features->Features[*ProtoEnd];
1923  X2 = F2->Params[PicoFeatX];
1924  Y2 = F2->Params[PicoFeatY];
1925  A2 = F2->Params[PicoFeatDir];
1926 
1927  AngleDelta = fabs(A1 - A2);
1928  if (AngleDelta > 0.5)
1929  AngleDelta = 1.0 - AngleDelta;
1930 
1931  if (AngleDelta > matcher_clustering_max_angle_delta ||
1932  fabs(X1 - X2) > SegmentLength ||
1933  fabs(Y1 - Y2) > SegmentLength)
1934  break;
1935  }
1936 
1937  F2 = Features->Features[*(ProtoEnd - 1)];
1938  X2 = F2->Params[PicoFeatX];
1939  Y2 = F2->Params[PicoFeatY];
1940  A2 = F2->Params[PicoFeatDir];
1941 
1942  Pid = AddIntProto(IClass);
1943  if (Pid == NO_PROTO)
1944  return (NO_PROTO);
1945 
1946  TempProto = NewTempProto();
1947  Proto = &(TempProto->Proto);
1948 
1949  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
1950  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
1951  instead of the -0.25 to 0.75 used in baseline normalization */
1952  Proto->Length = SegmentLength;
1953  Proto->Angle = A1;
1954  Proto->X = (X1 + X2) / 2.0;
1955  Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
1956  FillABC(Proto);
1957 
1958  TempProto->ProtoId = Pid;
1959  SET_BIT(TempProtoMask, Pid);
1960 
1961  ConvertProto(Proto, Pid, IClass);
1962  AddProtoToProtoPruner(Proto, Pid, IClass,
1964 
1965  Class->TempProtos = push(Class->TempProtos, TempProto);
1966  }
1967  return IClass->NumProtos - 1;
1968 } /* MakeNewTempProtos */
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:381
float FLOAT32
Definition: host.h:111
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
uinT16 ProtoId
Definition: adaptive.h:30
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:518
FEATURE Features[1]
Definition: ocrfeatures.h:72
int classify_learning_debug_level
Definition: classify.h:419
uinT8 FEATURE_ID
Definition: matchdefs.h:47
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:298
FLOAT32 X
Definition: protos.h:47
PROTO_STRUCT Proto
Definition: adaptive.h:32
FLOAT32 Angle
Definition: protos.h:49
#define GetPicoFeatureLength()
Definition: picofeat.h:59
#define NO_PROTO
Definition: matchdefs.h:42
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75
void FillABC(PROTO Proto)
Definition: protos.cpp:198
FLOAT32 Length
Definition: protos.h:50
#define SET_BIT(array, bit)
Definition: bitvec.h:57
LIST push(LIST list, void *element)
Definition: oldlist.cpp:323
inT16 PROTO_ID
Definition: matchdefs.h:41
BIT_VECTOR TempProtoMask
Definition: classify.h:483
double matcher_clustering_max_angle_delta
Definition: classify.h:432
uinT16 NumProtos
Definition: intproto.h:108
FLOAT32 Y
Definition: protos.h:48
void tesseract::Classify::MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob 
)
Parameters
Templatescurrent set of adaptive templates
ClassIdclass containing config to be made permanent
ConfigIdconfig to be made permanent
Blobcurrent blob being adapted to

Globals: none

Note
Exceptions: none
History: Thu Mar 14 15:54:08 1991, DSJ, Created.

Definition at line 1983 of file adaptmatch.cpp.

1986  {
1987  UNICHAR_ID *Ambigs;
1989  ADAPT_CLASS Class;
1990  PROTO_KEY ProtoKey;
1991 
1992  Class = Templates->Class[ClassId];
1993  Config = TempConfigFor(Class, ConfigId);
1994 
1995  MakeConfigPermanent(Class, ConfigId);
1996  if (Class->NumPermConfigs == 0)
1997  Templates->NumPermClasses++;
1998  Class->NumPermConfigs++;
1999 
2000  // Initialize permanent config.
2001  Ambigs = GetAmbiguities(Blob, ClassId);
2003  "PERM_CONFIG_STRUCT");
2004  Perm->Ambigs = Ambigs;
2005  Perm->FontinfoId = Config->FontinfoId;
2006 
2007  // Free memory associated with temporary config (since ADAPTED_CONFIG
2008  // is a union we need to clean up before we record permanent config).
2009  ProtoKey.Templates = Templates;
2010  ProtoKey.ClassId = ClassId;
2011  ProtoKey.ConfigId = ConfigId;
2012  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
2013  FreeTempConfig(Config);
2014 
2015  // Record permanent config.
2016  PermConfigFor(Class, ConfigId) = Perm;
2017 
2018  if (classify_learning_debug_level >= 1) {
2019  tprintf("Making config %d for %s (ClassId %d) permanent:"
2020  " fontinfo id %d, ambiguities '",
2021  ConfigId, getDict().getUnicharset().debug_str(ClassId).string(),
2022  ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
2023  for (UNICHAR_ID *AmbigsPointer = Ambigs;
2024  *AmbigsPointer >= 0; ++AmbigsPointer)
2025  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
2026  tprintf("'.\n");
2027  }
2028 } /* MakePermanent */
int MakeTempProtoPerm(void *item1, void *item2)
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
ADAPT_TEMPLATES Templates
Definition: adaptmatch.cpp:114
CLASS_ID ClassId
Definition: adaptmatch.cpp:115
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:55
CLUSTERCONFIG Config
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:125
Dict & getDict()
Definition: classify.h:65
int UNICHAR_ID
Definition: unichar.h:33
void * alloc_struct(inT32 count, const char *)
Definition: memry.cpp:39
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:96
UNICHAR_ID * Ambigs
Definition: adaptive.h:52
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:80
void tesseract::Classify::MasterMatcher ( INT_TEMPLATES  templates,
inT16  num_features,
const INT_FEATURE_STRUCT features,
const uinT8 norm_factors,
ADAPT_CLASS classes,
int  debug,
int  matcher_multiplier,
const TBOX blob_box,
const GenericVector< CP_RESULT_STRUCT > &  results,
ADAPT_RESULTS final_results 
)

Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.

Definition at line 1132 of file adaptmatch.cpp.

1141  {
1142  int top = blob_box.top();
1143  int bottom = blob_box.bottom();
1144  UnicharRating int_result;
1145  for (int c = 0; c < results.size(); c++) {
1146  CLASS_ID class_id = results[c].Class;
1147  BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
1148  : AllProtosOn;
1149  BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs
1150  : AllConfigsOn;
1151 
1152  int_result.unichar_id = class_id;
1153  im_.Match(ClassForClassId(templates, class_id),
1154  protos, configs,
1155  num_features, features,
1156  &int_result, classify_adapt_feature_threshold, debug,
1158  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1159  ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
1160  results[c].Rating,
1161  final_results->BlobLength,
1162  matcher_multiplier, norm_factors,
1163  &int_result, final_results);
1164  }
1165 }
bool matcher_debug_separate_windows
Definition: classify.h:458
int size() const
Definition: genericvector.h:72
BIT_VECTOR PermConfigs
Definition: adaptive.h:69
inT32 BlobLength
Definition: adaptmatch.cpp:83
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
BIT_VECTOR AllProtosOn
Definition: classify.h:480
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:461
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
inT16 bottom() const
Definition: rect.h:61
#define ClassForClassId(T, c)
Definition: intproto.h:181
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
IntegerMatcher im_
Definition: classify.h:503
#define NULL
Definition: host.h:144
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
inT16 top() const
Definition: rect.h:54
int classify_adapt_feature_threshold
Definition: classify.h:447
BIT_VECTOR PermProtos
Definition: adaptive.h:68
ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates ( bool  InitFromUnicharset)

Allocates memory for adapted tempates. each char in unicharset to the newly created templates

Parameters
InitFromUnicharsetif true, add an empty class for
Returns
Ptr to new adapted templates.
Note
Globals: none
Exceptions: none
History: Fri Mar 8 10:15:28 1991, DSJ, Created.

Definition at line 167 of file adaptive.cpp.

167  {
168  ADAPT_TEMPLATES Templates;
169  int i;
170 
171  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
172 
173  Templates->Templates = NewIntTemplates ();
174  Templates->NumPermClasses = 0;
175  Templates->NumNonEmptyClasses = 0;
176 
177  /* Insert an empty class for each unichar id in unicharset */
178  for (i = 0; i < MAX_NUM_CLASSES; i++) {
179  Templates->Class[i] = NULL;
180  if (InitFromUnicharset && i < unicharset.size()) {
181  AddAdaptedClass(Templates, NewAdaptedClass(), i);
182  }
183  }
184 
185  return (Templates);
186 
187 } /* NewAdaptedTemplates */
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:113
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:49
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:739
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
INT_TEMPLATES Templates
Definition: adaptive.h:77
void * Emalloc(int Size)
Definition: emalloc.cpp:35
#define NULL
Definition: host.h:144
int size() const
Definition: unicharset.h:297
void tesseract::Classify::NormalizeOutlines ( LIST  Outlines,
FLOAT32 XScale,
FLOAT32 YScale 
)

Definition at line 295 of file mfoutline.cpp.

297  {
298 /*
299  ** Parameters:
300  ** Outlines list of outlines to be normalized
301  ** XScale x-direction scale factor used by routine
302  ** YScale y-direction scale factor used by routine
303  ** Globals:
304  ** classify_norm_method method being used for normalization
305  ** classify_char_norm_range map radius of gyration to this value
306  ** Operation: This routine normalizes every outline in Outlines
307  ** according to the currently selected normalization method.
308  ** It also returns the scale factors that it used to do this
309  ** scaling. The scale factors returned represent the x and
310  ** y sizes in the normalized coordinate system that correspond
311  ** to 1 pixel in the original coordinate system.
312  ** Return: none (Outlines are changed and XScale and YScale are updated)
313  ** Exceptions: none
314  ** History: Fri Dec 14 08:14:55 1990, DSJ, Created.
315  */
316  MFOUTLINE Outline;
317 
318  switch (classify_norm_method) {
319  case character:
320  ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
321  break;
322 
323  case baseline:
324  iterate(Outlines) {
325  Outline = (MFOUTLINE) first_node(Outlines);
326  NormalizeOutline(Outline, 0.0);
327  }
328  *XScale = *YScale = MF_SCALE_FACTOR;
329  break;
330  }
331 } /* NormalizeOutlines */
#define first_node(l)
Definition: oldlist.h:139
#define iterate(l)
Definition: oldlist.h:159
#define ASSERT_HOST(x)
Definition: errcode.h:84
void NormalizeOutline(MFOUTLINE Outline, FLOAT32 XOrigin)
Definition: mfoutline.cpp:260
LIST MFOUTLINE
Definition: mfoutline.h:33
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
void tesseract::Classify::PrintAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine prints a summary of the adapted templates in Templates to File.

Parameters
Fileopen text file to print Templates to
Templatesadapted templates to print to File
Note
Globals: none
Exceptions: none
History: Wed Mar 20 13:35:29 1991, DSJ, Created.

Definition at line 273 of file adaptive.cpp.

273  {
274  int i;
275  INT_CLASS IClass;
276  ADAPT_CLASS AClass;
277 
278  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
279  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
280  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
281  fprintf (File, " Id NC NPC NP NPP\n");
282  fprintf (File, "------------------------\n");
283 
284  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
285  IClass = Templates->Templates->Class[i];
286  AClass = Templates->Class[i];
287  if (!IsEmptyAdaptedClass (AClass)) {
288  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
290  IClass->NumConfigs, AClass->NumPermConfigs,
291  IClass->NumProtos,
292  IClass->NumProtos - count (AClass->TempProtos));
293  }
294  }
295  fprintf (File, "\n");
296 
297 } /* PrintAdaptedTemplates */
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
UNICHARSET unicharset
Definition: ccutil.h:72
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
int count(LIST var_list)
Definition: oldlist.cpp:108
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
INT_TEMPLATES Templates
Definition: adaptive.h:77
uinT8 NumConfigs
Definition: intproto.h:110
uinT16 NumProtos
Definition: intproto.h:108
void tesseract::Classify::PrintAdaptiveMatchResults ( const ADAPT_RESULTS results)

This routine writes the matches in Results to File.

Parameters
Fileopen text file to write Results to
Resultsmatch results to write to File

Globals: none

Note
Exceptions: none
History: Mon Mar 18 09:24:53 1991, DSJ, Created.

Definition at line 2083 of file adaptmatch.cpp.

2083  {
2084  for (int i = 0; i < results.match.size(); ++i) {
2085  tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).string());
2086  results.match[i].Print();
2087  }
2088 } /* PrintAdaptiveMatchResults */
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
int size() const
Definition: genericvector.h:72
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
const char * string() const
Definition: strngs.cpp:193
int tesseract::Classify::PruneClasses ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
int  keep_this,
const INT_FEATURE_STRUCT features,
const uinT8 normalization_factors,
const uinT16 expected_num_features,
GenericVector< CP_RESULT_STRUCT > *  results 
)

Definition at line 407 of file intmatcher.cpp.

412  {
413 /*
414  ** Operation:
415  ** Prunes the classes using a modified fast match table.
416  ** Returns a sorted list of classes along with the number
417  ** of pruned classes in that list.
418  ** Return: Number of pruned classes.
419  ** Exceptions: none
420  ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
421  */
422  ClassPruner pruner(int_templates->NumClasses);
423  // Compute initial match scores for all classes.
424  pruner.ComputeScores(int_templates, num_features, features);
425  // Adjust match scores for number of expected features.
426  pruner.AdjustForExpectedNumFeatures(expected_num_features,
428  // Apply disabled classes in unicharset - only works without a shape_table.
429  if (shape_table_ == NULL)
430  pruner.DisableDisabledClasses(unicharset);
431  // If fragments are disabled, remove them, also only without a shape table.
433  pruner.DisableFragments(unicharset);
434 
435  // If we have good x-heights, apply the given normalization factors.
436  if (normalization_factors != NULL) {
437  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
438  normalization_factors);
439  } else {
440  pruner.NoNormalization();
441  }
442  // Do the actual pruning and sort the short-list.
443  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
445 
446  if (classify_debug_level > 2) {
447  pruner.DebugMatch(*this, int_templates, features);
448  }
449  if (classify_debug_level > 1) {
450  pruner.SummarizeResult(*this, int_templates, expected_num_features,
452  normalization_factors);
453  }
454  // Convert to the expected output format.
455  return pruner.SetupResults(results);
456 }
UNICHARSET unicharset
Definition: ccutil.h:72
ShapeTable * shape_table_
Definition: classify.h:512
int classify_class_pruner_multiplier
Definition: classify.h:465
int classify_class_pruner_threshold
Definition: classify.h:463
bool disable_character_fragments
Definition: classify.h:450
#define NULL
Definition: host.h:144
int classify_cp_cutoff_strength
Definition: classify.h:467
ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates ( FILE *  File)

Read a set of adapted templates from File and return a ptr to the templates.

Parameters
Fileopen text file to read adapted templates from
Returns
Ptr to adapted templates read from File.
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:18:10 1991, DSJ, Created.

Definition at line 369 of file adaptive.cpp.

369  {
370  int i;
371  ADAPT_TEMPLATES Templates;
372 
373  /* first read the high level adaptive template struct */
374  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
375  fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
376 
377  /* then read in the basic integer templates */
378  Templates->Templates = ReadIntTemplates (File);
379 
380  /* then read in the adaptive info for each class */
381  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
382  Templates->Class[i] = ReadAdaptedClass (File);
383  }
384  return (Templates);
385 
386 } /* ReadAdaptedTemplates */
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:776
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
ADAPT_CLASS ReadAdaptedClass(FILE *File)
Definition: adaptive.cpp:313
INT_TEMPLATES Templates
Definition: adaptive.h:77
void * Emalloc(int Size)
Definition: emalloc.cpp:35
INT_TEMPLATES tesseract::Classify::ReadIntTemplates ( FILE *  File)

Definition at line 776 of file intproto.cpp.

776  {
777 /*
778  ** Parameters:
779  ** File open file to read templates from
780  ** Globals: none
781  ** Operation: This routine reads a set of integer templates from
782  ** File. File must already be open and must be in the
783  ** correct binary format.
784  ** Return: Pointer to integer templates read from File.
785  ** Exceptions: none
786  ** History: Wed Feb 27 11:48:46 1991, DSJ, Created.
787  */
788  int i, j, w, x, y, z;
789  BOOL8 swap;
790  int nread;
791  int unicharset_size;
792  int version_id = 0;
793  INT_TEMPLATES Templates;
794  CLASS_PRUNER_STRUCT* Pruner;
795  INT_CLASS Class;
796  uinT8 *Lengths;
797  PROTO_SET ProtoSet;
798 
799  /* variables for conversion from older inttemp formats */
800  int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
801  CLASS_ID class_id, max_class_id;
802  inT16 *IndexFor = new inT16[MAX_NUM_CLASSES];
803  CLASS_ID *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES];
804  CLASS_PRUNER_STRUCT **TempClassPruner =
806  uinT32 SetBitsForMask = // word with NUM_BITS_PER_CLASS
807  (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0
808  uinT32 Mask, NewMask, ClassBits;
809  int MaxNumConfigs = MAX_NUM_CONFIGS;
810  int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
811 
812  /* first read the high level template struct */
813  Templates = NewIntTemplates();
814  // Read Templates in parts for 64 bit compatibility.
815  if (fread(&unicharset_size, sizeof(int), 1, File) != 1)
816  cprintf("Bad read of inttemp!\n");
817  if (fread(&Templates->NumClasses,
818  sizeof(Templates->NumClasses), 1, File) != 1 ||
819  fread(&Templates->NumClassPruners,
820  sizeof(Templates->NumClassPruners), 1, File) != 1)
821  cprintf("Bad read of inttemp!\n");
822  // Swap status is determined automatically.
823  swap = Templates->NumClassPruners < 0 ||
825  if (swap) {
826  Reverse32(&Templates->NumClassPruners);
827  Reverse32(&Templates->NumClasses);
828  Reverse32(&unicharset_size);
829  }
830  if (Templates->NumClasses < 0) {
831  // This file has a version id!
832  version_id = -Templates->NumClasses;
833  if (fread(&Templates->NumClasses, sizeof(Templates->NumClasses),
834  1, File) != 1)
835  cprintf("Bad read of inttemp!\n");
836  if (swap)
837  Reverse32(&Templates->NumClasses);
838  }
839 
840  if (version_id < 3) {
841  MaxNumConfigs = OLD_MAX_NUM_CONFIGS;
842  WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC;
843  }
844 
845  if (version_id < 2) {
846  for (i = 0; i < unicharset_size; ++i) {
847  if (fread(&IndexFor[i], sizeof(inT16), 1, File) != 1)
848  cprintf("Bad read of inttemp!\n");
849  }
850  for (i = 0; i < Templates->NumClasses; ++i) {
851  if (fread(&ClassIdFor[i], sizeof(CLASS_ID), 1, File) != 1)
852  cprintf("Bad read of inttemp!\n");
853  }
854  if (swap) {
855  for (i = 0; i < Templates->NumClasses; i++)
856  Reverse16(&IndexFor[i]);
857  for (i = 0; i < Templates->NumClasses; i++)
858  Reverse32(&ClassIdFor[i]);
859  }
860  }
861 
862  /* then read in the class pruners */
863  for (i = 0; i < Templates->NumClassPruners; i++) {
864  Pruner = new CLASS_PRUNER_STRUCT;
865  if ((nread =
866  fread(Pruner, 1, sizeof(CLASS_PRUNER_STRUCT),
867  File)) != sizeof(CLASS_PRUNER_STRUCT))
868  cprintf("Bad read of inttemp!\n");
869  if (swap) {
870  for (x = 0; x < NUM_CP_BUCKETS; x++) {
871  for (y = 0; y < NUM_CP_BUCKETS; y++) {
872  for (z = 0; z < NUM_CP_BUCKETS; z++) {
873  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
874  Reverse32(&Pruner->p[x][y][z][w]);
875  }
876  }
877  }
878  }
879  }
880  if (version_id < 2) {
881  TempClassPruner[i] = Pruner;
882  } else {
883  Templates->ClassPruners[i] = Pruner;
884  }
885  }
886 
887  /* fix class pruners if they came from an old version of inttemp */
888  if (version_id < 2) {
889  // Allocate enough class pruners to cover all the class ids.
890  max_class_id = 0;
891  for (i = 0; i < Templates->NumClasses; i++)
892  if (ClassIdFor[i] > max_class_id)
893  max_class_id = ClassIdFor[i];
894  for (i = 0; i <= CPrunerIdFor(max_class_id); i++) {
895  Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
896  memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
897  }
898  // Convert class pruners from the old format (indexed by class index)
899  // to the new format (indexed by class id).
900  last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
901  for (i = 0; i < Templates->NumClassPruners; i++) {
902  for (x = 0; x < NUM_CP_BUCKETS; x++)
903  for (y = 0; y < NUM_CP_BUCKETS; y++)
904  for (z = 0; z < NUM_CP_BUCKETS; z++)
905  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
906  if (TempClassPruner[i]->p[x][y][z][w] == 0)
907  continue;
908  for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) {
909  bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b;
910  if (bit_number > last_cp_bit_number)
911  break; // the rest of the bits in this word are not used
912  class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS];
913  // Single out NUM_BITS_PER_CLASS bits relating to class_id.
914  Mask = SetBitsForMask << b;
915  ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
916  // Move these bits to the new position in which they should
917  // appear (indexed corresponding to the class_id).
918  new_i = CPrunerIdFor(class_id);
919  new_w = CPrunerWordIndexFor(class_id);
920  new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS;
921  if (new_b > b) {
922  ClassBits <<= (new_b - b);
923  } else {
924  ClassBits >>= (b - new_b);
925  }
926  // Copy bits relating to class_id to the correct position
927  // in Templates->ClassPruner.
928  NewMask = SetBitsForMask << new_b;
929  Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask;
930  Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits;
931  }
932  }
933  }
934  for (i = 0; i < Templates->NumClassPruners; i++) {
935  delete TempClassPruner[i];
936  }
937  }
938 
939  /* then read in each class */
940  for (i = 0; i < Templates->NumClasses; i++) {
941  /* first read in the high level struct for the class */
942  Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT));
943  if (fread(&Class->NumProtos, sizeof(Class->NumProtos), 1, File) != 1 ||
944  fread(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File) != 1 ||
945  fread(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File) != 1)
946  cprintf ("Bad read of inttemp!\n");
947  if (version_id == 0) {
948  // Only version 0 writes 5 pointless pointers to the file.
949  for (j = 0; j < 5; ++j) {
950  int junk;
951  if (fread(&junk, sizeof(junk), 1, File) != 1)
952  cprintf ("Bad read of inttemp!\n");
953  }
954  }
955  if (version_id < 4) {
956  for (j = 0; j < MaxNumConfigs; ++j) {
957  if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
958  cprintf ("Bad read of inttemp!\n");
959  }
960  if (swap) {
961  Reverse16(&Class->NumProtos);
962  for (j = 0; j < MaxNumConfigs; j++)
963  Reverse16(&Class->ConfigLengths[j]);
964  }
965  } else {
966  ASSERT_HOST(Class->NumConfigs < MaxNumConfigs);
967  for (j = 0; j < Class->NumConfigs; ++j) {
968  if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
969  cprintf ("Bad read of inttemp!\n");
970  }
971  if (swap) {
972  Reverse16(&Class->NumProtos);
973  for (j = 0; j < MaxNumConfigs; j++)
974  Reverse16(&Class->ConfigLengths[j]);
975  }
976  }
977  if (version_id < 2) {
978  ClassForClassId (Templates, ClassIdFor[i]) = Class;
979  } else {
980  ClassForClassId (Templates, i) = Class;
981  }
982 
983  /* then read in the proto lengths */
984  Lengths = NULL;
985  if (MaxNumIntProtosIn (Class) > 0) {
986  Lengths = (uinT8 *)Emalloc(sizeof(uinT8) * MaxNumIntProtosIn(Class));
987  if ((nread =
988  fread((char *)Lengths, sizeof(uinT8),
989  MaxNumIntProtosIn(Class), File)) != MaxNumIntProtosIn (Class))
990  cprintf ("Bad read of inttemp!\n");
991  }
992  Class->ProtoLengths = Lengths;
993 
994  /* then read in the proto sets */
995  for (j = 0; j < Class->NumProtoSets; j++) {
996  ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT));
997  if (version_id < 3) {
998  if ((nread =
999  fread((char *) &ProtoSet->ProtoPruner, 1,
1000  sizeof(PROTO_PRUNER), File)) != sizeof(PROTO_PRUNER))
1001  cprintf("Bad read of inttemp!\n");
1002  for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
1003  if ((nread = fread((char *) &ProtoSet->Protos[x].A, 1,
1004  sizeof(inT8), File)) != sizeof(inT8) ||
1005  (nread = fread((char *) &ProtoSet->Protos[x].B, 1,
1006  sizeof(uinT8), File)) != sizeof(uinT8) ||
1007  (nread = fread((char *) &ProtoSet->Protos[x].C, 1,
1008  sizeof(inT8), File)) != sizeof(inT8) ||
1009  (nread = fread((char *) &ProtoSet->Protos[x].Angle, 1,
1010  sizeof(uinT8), File)) != sizeof(uinT8))
1011  cprintf("Bad read of inttemp!\n");
1012  for (y = 0; y < WerdsPerConfigVec; y++)
1013  if ((nread = fread((char *) &ProtoSet->Protos[x].Configs[y], 1,
1014  sizeof(uinT32), File)) != sizeof(uinT32))
1015  cprintf("Bad read of inttemp!\n");
1016  }
1017  } else {
1018  if ((nread =
1019  fread((char *) ProtoSet, 1, sizeof(PROTO_SET_STRUCT),
1020  File)) != sizeof(PROTO_SET_STRUCT))
1021  cprintf("Bad read of inttemp!\n");
1022  }
1023  if (swap) {
1024  for (x = 0; x < NUM_PP_PARAMS; x++)
1025  for (y = 0; y < NUM_PP_BUCKETS; y++)
1026  for (z = 0; z < WERDS_PER_PP_VECTOR; z++)
1027  Reverse32(&ProtoSet->ProtoPruner[x][y][z]);
1028  for (x = 0; x < PROTOS_PER_PROTO_SET; x++)
1029  for (y = 0; y < WerdsPerConfigVec; y++)
1030  Reverse32(&ProtoSet->Protos[x].Configs[y]);
1031  }
1032  Class->ProtoSets[j] = ProtoSet;
1033  }
1034  if (version_id < 4)
1035  Class->font_set_id = -1;
1036  else {
1037  fread(&Class->font_set_id, sizeof(int), 1, File);
1038  if (swap)
1039  Reverse32(&Class->font_set_id);
1040  }
1041  }
1042 
1043  if (version_id < 2) {
1044  /* add an empty NULL class with class id 0 */
1045  assert(UnusedClassIdIn (Templates, 0));
1046  ClassForClassId (Templates, 0) = NewIntClass (1, 1);
1047  ClassForClassId (Templates, 0)->font_set_id = -1;
1048  Templates->NumClasses++;
1049  /* make sure the classes are contiguous */
1050  for (i = 0; i < MAX_NUM_CLASSES; i++) {
1051  if (i < Templates->NumClasses) {
1052  if (ClassForClassId (Templates, i) == NULL) {
1053  fprintf(stderr, "Non-contiguous class ids in inttemp\n");
1054  exit(1);
1055  }
1056  } else {
1057  if (ClassForClassId (Templates, i) != NULL) {
1058  fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n",
1059  i, Templates->NumClasses);
1060  exit(1);
1061  }
1062  }
1063  }
1064  }
1065  if (version_id >= 4) {
1066  this->fontinfo_table_.read(File, NewPermanentTessCallback(read_info), swap);
1067  if (version_id >= 5) {
1068  this->fontinfo_table_.read(File,
1070  swap);
1071  }
1072  this->fontset_table_.read(File, NewPermanentTessCallback(read_set), swap);
1073  }
1074 
1075  // Clean up.
1076  delete[] IndexFor;
1077  delete[] ClassIdFor;
1078  delete[] TempClassPruner;
1079 
1080  return (Templates);
1081 } /* ReadIntTemplates */
#define NUM_CP_BUCKETS
Definition: intproto.h:52
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
#define MaxNumIntProtosIn(C)
Definition: intproto.h:168
#define WERDS_PER_PP_VECTOR
Definition: intproto.h:62
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
struct INT_CLASS_STRUCT * INT_CLASS
void Reverse32(void *ptr)
Definition: helpers.h:193
bool read_set(FILE *f, FontSet *fs, bool swap)
Definition: fontinfo.cpp:240
unsigned char BOOL8
Definition: host.h:113
uinT32 PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]
Definition: intproto.h:92
#define CPrunerIdFor(c)
Definition: intproto.h:183
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define CPrunerWordIndexFor(c)
Definition: intproto.h:185
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:673
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
#define ASSERT_HOST(x)
Definition: errcode.h:84
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
#define MAX_NUM_CLASS_PRUNERS
Definition: intproto.h:59
#define BITS_PER_WERD
Definition: intproto.h:44
unsigned int uinT32
Definition: host.h:103
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:739
#define OLD_WERDS_PER_CONFIG_VEC
Definition: intproto.cpp:115
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
#define NUM_PP_PARAMS
Definition: intproto.h:50
void Reverse16(void *ptr)
Definition: helpers.h:188
#define NUM_PP_BUCKETS
Definition: intproto.h:51
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
#define ClassForClassId(T, c)
Definition: intproto.h:181
void * Emalloc(int Size)
Definition: emalloc.cpp:35
#define CPrunerBitIndexFor(c)
Definition: intproto.h:186
uinT8 NumProtoSets
Definition: intproto.h:109
PROTO_PRUNER ProtoPruner
Definition: intproto.h:96
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
#define BITS_PER_CP_VECTOR
Definition: intproto.h:58
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
uinT8 NumConfigs
Definition: intproto.h:110
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
bool read_spacing_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:177
#define NULL
Definition: host.h:144
SIGNED char inT8
Definition: host.h:98
bool read_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:152
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
uinT8 * ProtoLengths
Definition: intproto.h:112
#define WERDS_PER_CONFIG_VEC
Definition: intproto.h:68
uinT16 NumProtos
Definition: intproto.h:108
struct PROTO_SET_STRUCT * PROTO_SET
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
unsigned short uinT16
Definition: host.h:101
short inT16
Definition: host.h:100
unsigned char uinT8
Definition: host.h:99
#define OLD_MAX_NUM_CONFIGS
Definition: intproto.cpp:114
void tesseract::Classify::ReadNewCutoffs ( FILE *  CutoffFile,
bool  swap,
inT64  end_offset,
CLASS_CUTOFF_ARRAY  Cutoffs 
)

Definition at line 42 of file cutoffs.cpp.

43  {
44 /*
45  ** Parameters:
46  ** Filename name of file containing cutoff definitions
47  ** Cutoffs array to put cutoffs into
48  ** Globals: none
49  ** Operation: Open Filename, read in all of the class-id/cutoff pairs
50  ** and insert them into the Cutoffs array. Cutoffs are
51  ** indexed in the array by class id. Unused entries in the
52  ** array are set to an arbitrarily high cutoff value.
53  ** Return: none
54  ** Exceptions: none
55  ** History: Wed Feb 20 09:38:26 1991, DSJ, Created.
56  */
57  char Class[UNICHAR_LEN + 1];
58  CLASS_ID ClassId;
59  int Cutoff;
60  int i;
61 
62  if (shape_table_ != NULL) {
63  if (!shapetable_cutoffs_.DeSerialize(swap, CutoffFile)) {
64  tprintf("Error during read of shapetable pffmtable!\n");
65  }
66  }
67  for (i = 0; i < MAX_NUM_CLASSES; i++)
68  Cutoffs[i] = MAX_CUTOFF;
69 
70  while ((end_offset < 0 || ftell(CutoffFile) < end_offset) &&
71  tfscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d",
72  Class, &Cutoff) == 2) {
73  if (strcmp(Class, "NULL") == 0) {
74  ClassId = unicharset.unichar_to_id(" ");
75  } else {
76  ClassId = unicharset.unichar_to_id(Class);
77  }
78  Cutoffs[ClassId] = Cutoff;
79  SkipNewline(CutoffFile);
80  }
81 } /* ReadNewCutoffs */
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define tprintf(...)
Definition: tprintf.h:31
UNICHARSET unicharset
Definition: ccutil.h:72
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:229
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define REALLY_QUOTE_IT(x)
Definition: cutoffs.cpp:33
ShapeTable * shape_table_
Definition: classify.h:512
bool DeSerialize(bool swap, FILE *fp)
void SkipNewline(FILE *file)
Definition: helpers.h:84
#define MAX_CUTOFF
Definition: cutoffs.cpp:35
#define NULL
Definition: host.h:144
#define UNICHAR_LEN
Definition: unichar.h:30
NORM_PROTOS * tesseract::Classify::ReadNormProtos ( FILE *  File,
inT64  end_offset 
)

Definition at line 234 of file normmatch.cpp.

234  {
235 /*
236  ** Parameters:
237  ** File open text file to read normalization protos from
238  ** Globals: none
239  ** Operation: This routine allocates a new data structure to hold
240  ** a set of character normalization protos. It then fills in
241  ** the data structure by reading from the specified File.
242  ** Return: Character normalization protos.
243  ** Exceptions: none
244  ** History: Wed Dec 19 16:38:49 1990, DSJ, Created.
245  */
247  int i;
248  char unichar[2 * UNICHAR_LEN + 1];
249  UNICHAR_ID unichar_id;
250  LIST Protos;
251  int NumProtos;
252 
253  /* allocate and initialization data structure */
254  NormProtos = (NORM_PROTOS *) Emalloc (sizeof (NORM_PROTOS));
255  NormProtos->NumProtos = unicharset.size();
256  NormProtos->Protos = (LIST *) Emalloc (NormProtos->NumProtos * sizeof(LIST));
257  for (i = 0; i < NormProtos->NumProtos; i++)
258  NormProtos->Protos[i] = NIL_LIST;
259 
260  /* read file header and save in data structure */
261  NormProtos->NumParams = ReadSampleSize (File);
262  NormProtos->ParamDesc = ReadParamDesc (File, NormProtos->NumParams);
263 
264  /* read protos for each class into a separate list */
265  while ((end_offset < 0 || ftell(File) < end_offset) &&
266  tfscanf(File, "%s %d", unichar, &NumProtos) == 2) {
267  if (unicharset.contains_unichar(unichar)) {
268  unichar_id = unicharset.unichar_to_id(unichar);
269  Protos = NormProtos->Protos[unichar_id];
270  for (i = 0; i < NumProtos; i++)
271  Protos =
272  push_last (Protos, ReadPrototype (File, NormProtos->NumParams));
273  NormProtos->Protos[unichar_id] = Protos;
274  } else {
275  cprintf("Error: unichar %s in normproto file is not in unichar set.\n",
276  unichar);
277  for (i = 0; i < NumProtos; i++)
278  FreePrototype(ReadPrototype (File, NormProtos->NumParams));
279  }
280  SkipNewline(File);
281  }
282  return (NormProtos);
283 } /* ReadNormProtos */
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
UNICHARSET unicharset
Definition: ccutil.h:72
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:229
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:338
uinT16 ReadSampleSize(FILE *File)
Definition: clusttool.cpp:46
LIST * Protos
Definition: normmatch.cpp:42
void SkipNewline(FILE *file)
Definition: helpers.h:84
int UNICHAR_ID
Definition: unichar.h:33
void * Emalloc(int Size)
Definition: emalloc.cpp:35
#define NIL_LIST
Definition: oldlist.h:126
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
void FreePrototype(void *arg)
Definition: cluster.cpp:579
#define UNICHAR_LEN
Definition: unichar.h:30
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
int size() const
Definition: unicharset.h:297
PARAM_DESC * ReadParamDesc(FILE *File, uinT16 N)
Definition: clusttool.cpp:68
PROTOTYPE * ReadPrototype(FILE *File, uinT16 N)
Definition: clusttool.cpp:114
NORM_PROTOS * NormProtos
Definition: classify.h:486
void tesseract::Classify::RefreshDebugWindow ( ScrollView **  win,
const char *  msg,
int  y_offset,
const TBOX wbox 
)

Definition at line 220 of file adaptmatch.cpp.

221  {
222  #ifndef GRAPHICS_DISABLED
223  const int kSampleSpaceWidth = 500;
224  if (*win == NULL) {
225  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
226  kSampleSpaceWidth * 2, 200, true);
227  }
228  (*win)->Clear();
229  (*win)->Pen(64, 64, 64);
230  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
231  kSampleSpaceWidth, kBlnBaselineOffset);
232  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
233  kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
234  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
235  wbox.right(), wbox.bottom());
236  #endif // GRAPHICS_DISABLED
237 }
const int kBlnXHeight
Definition: normalis.h:28
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
const int kBlnBaselineOffset
Definition: normalis.h:29
inT16 bottom() const
Definition: rect.h:61
#define NULL
Definition: host.h:144
inT16 top() const
Definition: rect.h:54
void tesseract::Classify::RemoveBadMatches ( ADAPT_RESULTS Results)

This routine steps thru each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.

Parameters
Resultscontains matches to be filtered

Globals:

  • matcher_bad_match_pad defines a "bad match"
Note
Exceptions: none
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2106 of file adaptmatch.cpp.

2106  {
2107  int Next, NextGood;
2108  FLOAT32 BadMatchThreshold;
2109  static const char* romans = "i v x I V X";
2110  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
2111 
2113  UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2114  unicharset.unichar_to_id("1") : -1;
2115  UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2116  unicharset.unichar_to_id("0") : -1;
2117  float scored_one = ScoredUnichar(unichar_id_one, *Results);
2118  float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2119 
2120  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2121  const UnicharRating& match = Results->match[Next];
2122  if (match.rating >= BadMatchThreshold) {
2123  if (!unicharset.get_isalpha(match.unichar_id) ||
2124  strstr(romans,
2125  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2126  } else if (unicharset.eq(match.unichar_id, "l") &&
2127  scored_one < BadMatchThreshold) {
2128  Results->match[Next].unichar_id = unichar_id_one;
2129  } else if (unicharset.eq(match.unichar_id, "O") &&
2130  scored_zero < BadMatchThreshold) {
2131  Results->match[Next].unichar_id = unichar_id_zero;
2132  } else {
2133  Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
2134  }
2135  if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
2136  if (NextGood == Next) {
2137  ++NextGood;
2138  } else {
2139  Results->match[NextGood++] = Results->match[Next];
2140  }
2141  }
2142  }
2143  }
2144  } else {
2145  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2146  if (Results->match[Next].rating >= BadMatchThreshold) {
2147  if (NextGood == Next) {
2148  ++NextGood;
2149  } else {
2150  Results->match[NextGood++] = Results->match[Next];
2151  }
2152  }
2153  }
2154  }
2155  Results->match.truncate(NextGood);
2156 } /* RemoveBadMatches */
int size() const
Definition: genericvector.h:72
void truncate(int size)
bool classify_bln_numeric_mode
Definition: classify.h:500
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
float FLOAT32
Definition: host.h:111
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:656
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
UNICHARSET unicharset
Definition: ccutil.h:72
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
int UNICHAR_ID
Definition: unichar.h:33
double matcher_bad_match_pad
Definition: classify.h:423
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
#define NULL
Definition: host.h:144
void tesseract::Classify::RemoveExtraPuncs ( ADAPT_RESULTS Results)

This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.

Parameters
Resultscontains matches to be filtered
Note
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2168 of file adaptmatch.cpp.

2168  {
2169  int Next, NextGood;
2170  int punc_count; /*no of garbage characters */
2171  int digit_count;
2172  /*garbage characters */
2173  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2174  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2175 
2176  punc_count = 0;
2177  digit_count = 0;
2178  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2179  const UnicharRating& match = Results->match[Next];
2180  bool keep = true;
2181  if (strstr(punc_chars,
2182  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2183  if (punc_count >= 2)
2184  keep = false;
2185  punc_count++;
2186  } else {
2187  if (strstr(digit_chars,
2188  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2189  if (digit_count >= 1)
2190  keep = false;
2191  digit_count++;
2192  }
2193  }
2194  if (keep) {
2195  if (NextGood == Next) {
2196  ++NextGood;
2197  } else {
2198  Results->match[NextGood++] = match;
2199  }
2200  }
2201  }
2202  Results->match.truncate(NextGood);
2203 } /* RemoveExtraPuncs */
int size() const
Definition: genericvector.h:72
void truncate(int size)
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
UNICHARSET unicharset
Definition: ccutil.h:72
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define NULL
Definition: host.h:144
void tesseract::Classify::ResetAdaptiveClassifierInternal ( )

Definition at line 613 of file adaptmatch.cpp.

613  {
615  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
616  NumAdaptationsFailed);
617  }
623  NumAdaptationsFailed = 0;
624 }
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
#define NULL
Definition: host.h:144
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::SetAdaptiveThreshold ( FLOAT32  Threshold)

This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters
Thresholdthreshold for creating new templates

Globals:

  • matcher_good_threshold default good match rating
Note
Exceptions: none
History: Tue Apr 9 08:33:13 1991, DSJ, Created.

Definition at line 2219 of file adaptmatch.cpp.

2219  {
2220  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2222  ClipToRange<int>(255 * Threshold, 0, 255));
2224  ClipToRange<int>(255 * Threshold, 0, 255));
2225 } /* SetAdaptiveThreshold */
double matcher_good_threshold
Definition: classify.h:420
int classify_adapt_proto_threshold
Definition: classify.h:445
int classify_adapt_feature_threshold
Definition: classify.h:447
void tesseract::Classify::SetStaticClassifier ( ShapeClassifier static_classifier)

Definition at line 204 of file classify.cpp.

204  {
205  delete static_classifier_;
206  static_classifier_ = static_classifier;
207 }
void tesseract::Classify::SettupPass1 ( )

This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).

Note
this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 670 of file adaptmatch.cpp.

670  {
672 
674 
675 } /* SettupPass1 */
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:370
Dict & getDict()
Definition: classify.h:65
bool classify_enable_learning
Definition: classify.h:389
void tesseract::Classify::SettupPass2 ( )

This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 690 of file adaptmatch.cpp.

690  {
693 
694 } /* SettupPass2 */
Dict & getDict()
Definition: classify.h:65
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:374
#define FALSE
Definition: capi.h:29
void tesseract::Classify::SetupBLCNDenorms ( const TBLOB blob,
bool  nonlinear_norm,
DENORM bl_denorm,
DENORM cn_denorm,
INT_FX_RESULT_STRUCT fx_info 
)
static

Definition at line 133 of file intfx.cpp.

135  {
136  // Compute 1st and 2nd moments of the original outline.
137  FCOORD center, second_moments;
138  int length = blob.ComputeMoments(&center, &second_moments);
139  if (fx_info != NULL) {
140  fx_info->Length = length;
141  fx_info->Rx = IntCastRounded(second_moments.y());
142  fx_info->Ry = IntCastRounded(second_moments.x());
143 
144  fx_info->Xmean = IntCastRounded(center.x());
145  fx_info->Ymean = IntCastRounded(center.y());
146  }
147  // Setup the denorm for Baseline normalization.
148  bl_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), center.x(), 128.0f,
149  1.0f, 1.0f, 128.0f, 128.0f);
150  // Setup the denorm for character normalization.
151  if (nonlinear_norm) {
154  TBOX box;
155  blob.GetPreciseBoundingBox(&box);
156  box.pad(1, 1);
157  blob.GetEdgeCoords(box, &x_coords, &y_coords);
158  cn_denorm->SetupNonLinear(&blob.denorm(), box, MAX_UINT8, MAX_UINT8,
159  0.0f, 0.0f, x_coords, y_coords);
160  } else {
161  cn_denorm->SetupNormalization(NULL, NULL, &blob.denorm(),
162  center.x(), center.y(),
163  51.2f / second_moments.x(),
164  51.2f / second_moments.y(),
165  128.0f, 128.0f);
166  }
167 }
float x() const
Definition: points.h:209
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
Definition: normalis.cpp:267
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
Definition: blobs.cpp:535
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:95
void pad(int xpad, int ypad)
Definition: rect.h:127
void GetPreciseBoundingBox(TBOX *precise_box) const
Definition: blobs.cpp:554
#define MAX_UINT8
Definition: host.h:121
const DENORM & denorm() const
Definition: blobs.h:340
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
Definition: blobs.cpp:570
int IntCastRounded(double x)
Definition: helpers.h:172
Definition: rect.h:30
float y() const
Definition: points.h:212
#define NULL
Definition: host.h:144
Definition: points.h:189
const ShapeTable* tesseract::Classify::shape_table ( ) const
inline

Definition at line 69 of file classify.h.

69  {
70  return shape_table_;
71  }
ShapeTable * shape_table_
Definition: classify.h:512
int tesseract::Classify::ShapeIDToClassID ( int  shape_id) const

Definition at line 2303 of file adaptmatch.cpp.

2303  {
2304  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2305  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2306  ASSERT_HOST(font_set_id >= 0);
2307  const FontSet &fs = fontset_table_.get(font_set_id);
2308  for (int config = 0; config < fs.size; ++config) {
2309  if (fs.configs[config] == shape_id)
2310  return id;
2311  }
2312  }
2313  tprintf("Shape %d not found\n", shape_id);
2314  return -1;
2315 }
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
#define tprintf(...)
Definition: tprintf.h:31
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
#define ASSERT_HOST(x)
Definition: errcode.h:84
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
void tesseract::Classify::ShowBestMatchFor ( int  shape_id,
const INT_FEATURE_STRUCT features,
int  num_features 
)

This routine displays debug information for the best config of the given shape_id for the given set of features.

Parameters
shape_idclassifier id to work with
featuresfeatures of the unknown character
num_featuresNumber of features in the features array.
Note
Exceptions: none
History: Fri Mar 22 08:43:52 1991, DSJ, Created.

Definition at line 2240 of file adaptmatch.cpp.

2242  {
2243 #ifndef GRAPHICS_DISABLED
2244  uinT32 config_mask;
2245  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2246  tprintf("No built-in templates for class/shape %d\n", shape_id);
2247  return;
2248  }
2249  if (num_features <= 0) {
2250  tprintf("Illegal blob (char norm features)!\n");
2251  return;
2252  }
2253  UnicharRating cn_result;
2254  classify_norm_method.set_value(character);
2257  num_features, features, &cn_result,
2260  tprintf("\n");
2261  config_mask = 1 << cn_result.config;
2262 
2263  tprintf("Static Shape ID: %d\n", shape_id);
2264  ShowMatchDisplay();
2266  AllProtosOn, reinterpret_cast<BIT_VECTOR>(&config_mask),
2267  num_features, features, &cn_result,
2272 #endif // GRAPHICS_DISABLED
2273 } /* ShowBestMatchFor */
bool matcher_debug_separate_windows
Definition: classify.h:458
#define tprintf(...)
Definition: tprintf.h:31
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
BIT_VECTOR AllProtosOn
Definition: classify.h:480
unsigned int uinT32
Definition: host.h:103
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:461
#define NO_DEBUG
Definition: adaptmatch.cpp:70
void UpdateMatchDisplay()
Definition: intproto.cpp:466
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
#define ClassForClassId(T, c)
Definition: intproto.h:181
IntegerMatcher im_
Definition: classify.h:503
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int classify_adapt_feature_threshold
Definition: classify.h:447
void tesseract::Classify::ShowMatchDisplay ( )

Definition at line 1086 of file intproto.cpp.

1086  {
1087 /*
1088  ** Parameters: none
1089  ** Globals:
1090  ** FeatureShapes display list containing feature matches
1091  ** ProtoShapes display list containing proto matches
1092  ** Operation: This routine sends the shapes in the global display
1093  ** lists to the match debugger window.
1094  ** Return: none
1095  ** Exceptions: none
1096  ** History: Thu Mar 21 15:47:33 1991, DSJ, Created.
1097  */
1099  if (ProtoDisplayWindow) {
1101  }
1102  if (FeatureDisplayWindow) {
1104  }
1106  static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
1107  IntMatchWindow);
1109  INT_MAX_X, INT_MAX_Y);
1110  if (ProtoDisplayWindow) {
1112  INT_MAX_X, INT_MAX_Y);
1113  }
1114  if (FeatureDisplayWindow) {
1116  INT_MAX_X, INT_MAX_Y);
1117  }
1118 } /* ShowMatchDisplay */
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
Definition: intproto.cpp:1122
#define INT_MIN_Y
Definition: intproto.cpp:66
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1935
ScrollView * ProtoDisplayWindow
Definition: intproto.cpp:183
#define INT_MAX_X
Definition: intproto.cpp:67
void Clear()
Definition: scrollview.cpp:595
ScrollView * IntMatchWindow
Definition: intproto.cpp:181
void ZoomToRectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:765
#define INT_MAX_Y
Definition: intproto.cpp:68
#define INT_MIN_X
Definition: intproto.cpp:65
ScrollView * FeatureDisplayWindow
Definition: intproto.cpp:182
void tesseract::Classify::StartBackupAdaptiveClassifier ( )

Definition at line 644 of file adaptmatch.cpp.

644  {
648 }
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
#define NULL
Definition: host.h:144
void tesseract::Classify::SwitchAdaptiveClassifier ( )

Definition at line 628 of file adaptmatch.cpp.

628  {
629  if (BackupAdaptedTemplates == NULL) {
631  return;
632  }
634  tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
635  NumAdaptationsFailed);
636  }
640  NumAdaptationsFailed = 0;
641 }
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:613
#define NULL
Definition: host.h:144
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool tesseract::Classify::TempConfigReliable ( CLASS_ID  class_id,
const TEMP_CONFIG config 
)

Definition at line 2319 of file adaptmatch.cpp.

2320  {
2321  if (classify_learning_debug_level >= 1) {
2322  tprintf("NumTimesSeen for config of %s is %d\n",
2323  getDict().getUnicharset().debug_str(class_id).string(),
2324  config->NumTimesSeen);
2325  }
2327  return true;
2328  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2329  return false;
2330  } else if (use_ambigs_for_adaption) {
2331  // Go through the ambigs vector and see whether we have already seen
2332  // enough times all the characters represented by the ambigs vector.
2333  const UnicharIdVector *ambigs =
2335  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2336  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2337  ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2338  assert(ambig_class != NULL);
2339  if (ambig_class->NumPermConfigs == 0 &&
2340  ambig_class->MaxNumTimesSeen <
2342  if (classify_learning_debug_level >= 1) {
2343  tprintf("Ambig %s has not been seen enough times,"
2344  " not making config for %s permanent\n",
2345  getDict().getUnicharset().debug_str(
2346  (*ambigs)[ambig]).string(),
2347  getDict().getUnicharset().debug_str(class_id).string());
2348  }
2349  return false;
2350  }
2351  }
2352  }
2353  return true;
2354 }
int size() const
Definition: genericvector.h:72
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
#define tprintf(...)
Definition: tprintf.h:31
int matcher_min_examples_for_prototyping
Definition: classify.h:428
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:102
Dict & getDict()
Definition: classify.h:65
uinT8 NumTimesSeen
Definition: adaptive.h:41
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:430
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:191
#define NULL
Definition: host.h:144
bool use_ambigs_for_adaption
Definition: ccutil.h:93
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::UpdateAmbigsGroup ( CLASS_ID  class_id,
TBLOB Blob 
)

Definition at line 2356 of file adaptmatch.cpp.

2356  {
2357  const UnicharIdVector *ambigs =
2359  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2360  if (classify_learning_debug_level >= 1) {
2361  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2362  getDict().getUnicharset().debug_str(class_id).string(), class_id);
2363  }
2364  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2365  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2366  const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2367  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2368  if (ConfigIsPermanent(ambigs_class, cfg)) continue;
2369  const TEMP_CONFIG config =
2370  TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2371  if (config != NULL && TempConfigReliable(ambig_class_id, config)) {
2372  if (classify_learning_debug_level >= 1) {
2373  tprintf("Making config %d of %s permanent\n", cfg,
2374  getDict().getUnicharset().debug_str(
2375  ambig_class_id).string());
2376  }
2377  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
2378  }
2379  }
2380  }
2381 }
int size() const
Definition: genericvector.h:72
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
#define tprintf(...)
Definition: tprintf.h:31
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:102
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
Dict & getDict()
Definition: classify.h:65
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:200
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
#define NULL
Definition: host.h:144
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void tesseract::Classify::WriteAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine saves Templates to File in a binary format.

Parameters
Fileopen text file to write Templates to
Templatesset of adapted templates to write to File
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:07:32 1991, DSJ, Created.

Definition at line 505 of file adaptive.cpp.

505  {
506  int i;
507 
508  /* first write the high level adaptive template struct */
509  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
510 
511  /* then write out the basic integer templates */
512  WriteIntTemplates (File, Templates->Templates, unicharset);
513 
514  /* then write out the adaptive info for each class */
515  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
516  WriteAdaptedClass (File, Templates->Class[i],
517  Templates->Templates->Class[i]->NumConfigs);
518  }
519 } /* WriteAdaptedTemplates */
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
UNICHARSET unicharset
Definition: ccutil.h:72
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:459
INT_TEMPLATES Templates
Definition: adaptive.h:77
uinT8 NumConfigs
Definition: intproto.h:110
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1145
void tesseract::Classify::WriteIntTemplates ( FILE *  File,
INT_TEMPLATES  Templates,
const UNICHARSET target_unicharset 
)

Definition at line 1145 of file intproto.cpp.

1146  {
1147 /*
1148  ** Parameters:
1149  ** File open file to write templates to
1150  ** Templates templates to save into File
1151  ** Globals: none
1152  ** Operation: This routine writes Templates to File. The format
1153  ** is an efficient binary format. File must already be open
1154  ** for writing.
1155  ** Return: none
1156  ** Exceptions: none
1157  ** History: Wed Feb 27 11:48:46 1991, DSJ, Created.
1158  */
1159  int i, j;
1160  INT_CLASS Class;
1161  int unicharset_size = target_unicharset.size();
1162  int version_id = -5; // When negated by the reader -1 becomes +1 etc.
1163 
1164  if (Templates->NumClasses != unicharset_size) {
1165  cprintf("Warning: executing WriteIntTemplates() with %d classes in"
1166  " Templates, while target_unicharset size is %d\n",
1167  Templates->NumClasses, unicharset_size);
1168  }
1169 
1170  /* first write the high level template struct */
1171  fwrite(&unicharset_size, sizeof(unicharset_size), 1, File);
1172  fwrite(&version_id, sizeof(version_id), 1, File);
1173  fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners),
1174  1, File);
1175  fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
1176 
1177  /* then write out the class pruners */
1178  for (i = 0; i < Templates->NumClassPruners; i++)
1179  fwrite(Templates->ClassPruners[i],
1180  sizeof(CLASS_PRUNER_STRUCT), 1, File);
1181 
1182  /* then write out each class */
1183  for (i = 0; i < Templates->NumClasses; i++) {
1184  Class = Templates->Class[i];
1185 
1186  /* first write out the high level struct for the class */
1187  fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File);
1188  fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
1189  ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size);
1190  fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
1191  for (j = 0; j < Class->NumConfigs; ++j) {
1192  fwrite(&Class->ConfigLengths[j], sizeof(uinT16), 1, File);
1193  }
1194 
1195  /* then write out the proto lengths */
1196  if (MaxNumIntProtosIn (Class) > 0) {
1197  fwrite ((char *) (Class->ProtoLengths), sizeof (uinT8),
1198  MaxNumIntProtosIn (Class), File);
1199  }
1200 
1201  /* then write out the proto sets */
1202  for (j = 0; j < Class->NumProtoSets; j++)
1203  fwrite ((char *) Class->ProtoSets[j],
1204  sizeof (PROTO_SET_STRUCT), 1, File);
1205 
1206  /* then write the fonts info */
1207  fwrite(&Class->font_set_id, sizeof(int), 1, File);
1208  }
1209 
1210  /* Write the fonts info tables */
1212  this->fontinfo_table_.write(File,
1215 } /* WriteIntTemplates */
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
#define MaxNumIntProtosIn(C)
Definition: intproto.h:168
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:168
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
#define ASSERT_HOST(x)
Definition: errcode.h:84
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:211
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
uinT8 NumProtoSets
Definition: intproto.h:109
uinT8 NumConfigs
Definition: intproto.h:110
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
uinT8 * ProtoLengths
Definition: intproto.h:112
int size() const
Definition: unicharset.h:297
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:253
uinT16 NumProtos
Definition: intproto.h:108
unsigned short uinT16
Definition: host.h:101
unsigned char uinT8
Definition: host.h:99
bool tesseract::Classify::WriteTRFile ( const STRING filename)

Definition at line 97 of file blobclass.cpp.

97  {
98  STRING tr_filename = filename + ".tr";
99  FILE* fp = Efopen(tr_filename.string(), "wb");
100  int len = tr_file_data_.length();
101  bool result =
102  fwrite(&tr_file_data_[0], sizeof(tr_file_data_[0]), len, fp) == len;
103  fclose(fp);
104  tr_file_data_.truncate_at(0);
105  return result;
106 }
inT32 length() const
Definition: strngs.cpp:188
void truncate_at(inT32 index)
Definition: strngs.cpp:264
FILE * Efopen(const char *Name, const char *Mode)
Definition: efio.cpp:32
Definition: strngs.h:44
const char * string() const
Definition: strngs.cpp:193

Member Data Documentation

ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates

Definition at line 473 of file classify.h.

BIT_VECTOR tesseract::Classify::AllConfigsOff

Definition at line 482 of file classify.h.

BIT_VECTOR tesseract::Classify::AllConfigsOn

Definition at line 481 of file classify.h.

bool tesseract::Classify::allow_blob_division = true

"Use divisible blobs chopping"

Definition at line 382 of file classify.h.

BIT_VECTOR tesseract::Classify::AllProtosOn

Definition at line 480 of file classify.h.

ADAPT_TEMPLATES tesseract::Classify::BackupAdaptedTemplates

Definition at line 477 of file classify.h.

double tesseract::Classify::certainty_scale = 20.0

"Certainty scaling factor"

Definition at line 437 of file classify.h.

int tesseract::Classify::classify_adapt_feature_threshold = 230

"Threshold for good features during adaptive 0-255"

Definition at line 447 of file classify.h.

int tesseract::Classify::classify_adapt_proto_threshold = 230

"Threshold for good protos during adaptive 0-255"

Definition at line 445 of file classify.h.

double tesseract::Classify::classify_adapted_pruning_factor = 2.5

"Prune poor adapted results this much worse than best result"

Definition at line 441 of file classify.h.

double tesseract::Classify::classify_adapted_pruning_threshold = -1.0

"Threshold at which classify_adapted_pruning_factor starts"

Definition at line 443 of file classify.h.

bool tesseract::Classify::classify_bln_numeric_mode = 0

"Assume the input is numbers [0-9]."

Definition at line 500 of file classify.h.

double tesseract::Classify::classify_char_norm_range = 0.2

"Character Normalization Range ..."

Definition at line 396 of file classify.h.

double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0

"Exclude fragments that do not match any whole character" " with at least this certainty"

Definition at line 453 of file classify.h.

int tesseract::Classify::classify_class_pruner_multiplier = 15

"Class Pruner Multiplier 0-255: "

Definition at line 465 of file classify.h.

int tesseract::Classify::classify_class_pruner_threshold = 229

"Class Pruner Threshold 0-255"

Definition at line 463 of file classify.h.

int tesseract::Classify::classify_cp_cutoff_strength = 7

"Class Pruner CutoffStrength: "

Definition at line 467 of file classify.h.

bool tesseract::Classify::classify_debug_character_fragments = FALSE

"Bring up graphical debugging windows for fragments training"

Definition at line 455 of file classify.h.

int tesseract::Classify::classify_debug_level = 0

"Classify debug level"

Definition at line 390 of file classify.h.

bool tesseract::Classify::classify_enable_adaptive_debugger = 0

"Enable match debugger"

Definition at line 414 of file classify.h.

bool tesseract::Classify::classify_enable_adaptive_matcher = 1

"Enable adaptive classifier"

Definition at line 409 of file classify.h.

bool tesseract::Classify::classify_enable_learning = true

"Enable adaptive classifier"

Definition at line 389 of file classify.h.

int tesseract::Classify::classify_integer_matcher_multiplier = 10

"Integer Matcher Multiplier 0-255: "

Definition at line 469 of file classify.h.

char* tesseract::Classify::classify_learn_debug_str = ""

"Class str to debug learning"

Definition at line 459 of file classify.h.

int tesseract::Classify::classify_learning_debug_level = 0

"Learning Debug Level: "

Definition at line 419 of file classify.h.

double tesseract::Classify::classify_max_certainty_margin = 5.5

"Veto difference between classifier certainties"

Definition at line 404 of file classify.h.

double tesseract::Classify::classify_max_norm_scale_x = 0.325

"Max char x-norm scale ..."

Definition at line 398 of file classify.h.

double tesseract::Classify::classify_max_norm_scale_y = 0.325

"Max char y-norm scale ..."

Definition at line 400 of file classify.h.

double tesseract::Classify::classify_max_rating_ratio = 1.5

"Veto ratio between classifier ratings"

Definition at line 402 of file classify.h.

double tesseract::Classify::classify_min_norm_scale_x = 0.0

"Min char x-norm scale ..."

Definition at line 397 of file classify.h.

double tesseract::Classify::classify_min_norm_scale_y = 0.0

"Min char y-norm scale ..."

Definition at line 399 of file classify.h.

double tesseract::Classify::classify_misfit_junk_penalty = 0.0

"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"

Definition at line 435 of file classify.h.

bool tesseract::Classify::classify_nonlinear_norm = 0

"Non-linear stroke-density normalization"

Definition at line 416 of file classify.h.

int tesseract::Classify::classify_norm_method = character

"Normalization Method ..."

Definition at line 394 of file classify.h.

bool tesseract::Classify::classify_save_adapted_templates = 0

"Save adapted templates to a file"

Definition at line 413 of file classify.h.

bool tesseract::Classify::classify_use_pre_adapted_templates = 0

"Use pre-adapted classifier templates"

Definition at line 411 of file classify.h.

bool tesseract::Classify::disable_character_fragments = TRUE

"Do not include character fragments in the" " results of the classifier"

Definition at line 450 of file classify.h.

bool tesseract::Classify::EnableLearning

Definition at line 484 of file classify.h.

FEATURE_DEFS_STRUCT tesseract::Classify::feature_defs_
protected

Definition at line 507 of file classify.h.

UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_

Definition at line 488 of file classify.h.

UnicityTable<FontSet> tesseract::Classify::fontset_table_

Definition at line 496 of file classify.h.

int tesseract::Classify::il1_adaption_test = 0

"Dont adapt to i/I at beginning of word"

Definition at line 498 of file classify.h.

IntegerMatcher tesseract::Classify::im_
protected

Definition at line 503 of file classify.h.

double tesseract::Classify::matcher_avg_noise_size = 12.0

"Avg. noise blob length: "

Definition at line 425 of file classify.h.

double tesseract::Classify::matcher_bad_match_pad = 0.15

"Bad Match Pad (0-1)"

Definition at line 423 of file classify.h.

double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015

"Maximum angle delta for prototype clustering"

Definition at line 432 of file classify.h.

int tesseract::Classify::matcher_debug_flags = 0

"Matcher Debug Flags"

Definition at line 418 of file classify.h.

int tesseract::Classify::matcher_debug_level = 0

"Matcher Debug Level"

Definition at line 417 of file classify.h.

bool tesseract::Classify::matcher_debug_separate_windows = FALSE

"Use two different windows for debugging the matching: " "One for the protos and one for the features."

Definition at line 458 of file classify.h.

double tesseract::Classify::matcher_good_threshold = 0.125

"Good Match (0-1)"

Definition at line 420 of file classify.h.

int tesseract::Classify::matcher_min_examples_for_prototyping = 3

"Reliable Config Threshold"

Definition at line 428 of file classify.h.

double tesseract::Classify::matcher_perfect_threshold = 0.02

"Perfect Match (0-1)"

Definition at line 422 of file classify.h.

int tesseract::Classify::matcher_permanent_classes_min = 1

"Min # of permanent classes"

Definition at line 426 of file classify.h.

double tesseract::Classify::matcher_rating_margin = 0.1

"New template margin (0-1)"

Definition at line 424 of file classify.h.

double tesseract::Classify::matcher_reliable_adaptive_result = 0.0

"Great Match (0-1)"

Definition at line 421 of file classify.h.

int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5

"Enable adaption even if the ambiguities have not been seen"

Definition at line 430 of file classify.h.

NORM_PROTOS* tesseract::Classify::NormProtos

Definition at line 486 of file classify.h.

INT_TEMPLATES tesseract::Classify::PreTrainedTemplates

Definition at line 469 of file classify.h.

bool tesseract::Classify::prioritize_division = FALSE

"Prioritize blob division over chopping"

Definition at line 387 of file classify.h.

double tesseract::Classify::rating_scale = 1.5

"Rating scaling factor"

Definition at line 436 of file classify.h.

ShapeTable* tesseract::Classify::shape_table_
protected

Definition at line 512 of file classify.h.

double tesseract::Classify::speckle_large_max_size = 0.30

"Max large speckle size"

Definition at line 501 of file classify.h.

double tesseract::Classify::speckle_rating_penalty = 10.0

"Penalty to add to worst rating for noise"

Definition at line 503 of file classify.h.

BIT_VECTOR tesseract::Classify::TempProtoMask

Definition at line 483 of file classify.h.

bool tesseract::Classify::tess_bn_matching = 0

"Baseline Normalized Matching"

Definition at line 408 of file classify.h.

bool tesseract::Classify::tess_cn_matching = 0

"Character Normalized Matching"

Definition at line 407 of file classify.h.

double tesseract::Classify::tessedit_class_miss_scale = 0.00390625

"Scale factor for features not used"

Definition at line 439 of file classify.h.

int tesseract::Classify::tessedit_single_match = FALSE

"Top choice only from CP"

Definition at line 388 of file classify.h.


The documentation for this class was generated from the following files: