16 #ifndef USE_STD_NAMESPACE
17 #include "base/init_google.h"
18 #include "base/commandlineflags.h"
20 #include "allheaders.h"
70 #ifndef USE_STD_NAMESPACE
71 const char*
kUsage =
"[flags] [ .tr files ... ]\n";
73 const char*
kUsage =
"[-c configfile]\n"
75 "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n"
76 "\t[-U InputUnicharset]\n"
77 "\t[-O OutputUnicharset]\n"
78 "\t[-F FontInfoFile]\n"
79 "\t[-X InputXHeightsFile]\n"
80 "\t[-S InputShapeTable]\n"
81 "\t[ .tr files ... ]\n";
108 #ifndef USE_STD_NAMESPACE
109 InitGoogle(
kUsage, argc, argv,
true);
117 while ((Option =
tessopt(*argc, *argv,
"F:O:U:D:C:I:M:B:S:X:c:")) != EOF) {
121 if ( ParametersRead != 1 ) Error =
TRUE;
127 if ( ParametersRead != 1 ) Error =
TRUE;
133 if ( ParametersRead != 1 ) Error =
TRUE;
139 if ( ParametersRead != 1 ) Error =
TRUE;
166 fprintf(stderr,
"Usage: %s %s\n", (*argv)[0],
kUsage);
172 if (!FLAGS_configfile.empty()) {
174 FLAGS_configfile.c_str(),
185 STRING shape_table_file = file_prefix;
186 shape_table_file += kShapeTableFileSuffix;
187 FILE* shape_fp = fopen(shape_table_file.
string(),
"rb");
188 if (shape_fp !=
NULL) {
193 tprintf(
"Error: Failed to read shape table %s\n",
194 shape_table_file.
string());
196 int num_shapes = shape_table->
NumShapes();
197 tprintf(
"Read shape table %s of %d shapes\n",
198 shape_table_file.
string(), num_shapes);
202 tprintf(
"Warning: No shape table file present: %s\n",
203 shape_table_file.
string());
210 STRING shape_table_file = file_prefix;
211 shape_table_file += kShapeTableFileSuffix;
212 FILE* fp = fopen(shape_table_file.
string(),
"wb");
215 fprintf(stderr,
"Error writing shape table: %s\n",
216 shape_table_file.
string());
220 fprintf(stderr,
"Error creating shape table: %s\n",
221 shape_table_file.
string());
245 if (!FLAGS_D.empty()) {
246 *file_prefix += FLAGS_D.c_str();
253 bool shape_analysis =
false;
254 if (shape_table !=
NULL) {
256 if (*shape_table !=
NULL)
257 shape_analysis =
true;
259 shape_analysis =
true;
265 if (FLAGS_input_trainer.empty()) {
268 if (!FLAGS_F.empty()) {
274 if (!FLAGS_X.empty()) {
283 const char* page_name;
286 tprintf(
"Reading %s ...\n", page_name);
287 FILE* fp =
Efopen(page_name,
"rb");
293 int pagename_len = strlen(page_name);
294 char *fontinfo_file_name =
new char[pagename_len + 7];
295 strncpy(fontinfo_file_name, page_name, pagename_len - 2);
296 strcpy(fontinfo_file_name + pagename_len - 2,
"fontinfo");
298 delete[] fontinfo_file_name;
301 if (FLAGS_load_images) {
302 STRING image_name = page_name;
311 if (!FLAGS_output_trainer.empty()) {
312 FILE* fp = fopen(FLAGS_output_trainer.c_str(),
"wb");
314 tprintf(
"Can't create saved trainer data!\n");
321 bool success =
false;
322 tprintf(
"Loading master trainer from file:%s\n",
323 FLAGS_input_trainer.c_str());
324 FILE* fp = fopen(FLAGS_input_trainer.c_str(),
"rb");
326 tprintf(
"Can't read file %s to initialize master trainer\n",
327 FLAGS_input_trainer.c_str());
333 tprintf(
"Deserialize of master trainer failed!\n");
339 if (!FLAGS_O.empty() &&
341 fprintf(stderr,
"Failed to save unicharset to file %s\n", FLAGS_O.c_str());
345 if (shape_table !=
NULL) {
348 if (*shape_table ==
NULL) {
351 tprintf(
"Flat shape table summary: %s\n",
352 (*shape_table)->SummaryStr().string());
354 (*shape_table)->set_unicharset(trainer->
unicharset());
409 if (strcmp (LabeledList->
Label, Label) == 0)
410 return (LabeledList);
437 strcpy (LabeledList->
Label, Label);
441 return (LabeledList);
449 const char *feature_name,
int max_samples,
451 FILE* file,
LIST* training_samples) {
475 LIST it = *training_samples;
481 while (fgets(buffer, 2048, file) !=
NULL) {
482 if (buffer[0] ==
'\n')
485 sscanf(buffer,
"%*s %s", unichar);
489 tprintf(
"Error: Size of unicharset in training is "
490 "greater than MAX_NUM_CLASSES\n");
494 char_sample =
FindList(*training_samples, unichar);
495 if (char_sample ==
NULL) {
497 *training_samples =
push(*training_samples, char_sample);
500 feature_samples = char_desc->
FeatureSets[feature_type];
502 char_sample->
List =
push(char_sample->
List, feature_samples);
509 if (feature_type != i)
537 FeatureList = char_sample->
List;
562 free(LabeledList->
Label);
569 const char* program_feature_type) {
599 FeatureList = char_sample->
List;
606 for (j = 0; j < N; j++)
612 if ( Sample !=
NULL ) free( Sample );
621 bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
623 LIST pProtoList = ProtoList;
631 LIST list_it = ProtoList;
634 if (test_p != Prototype && !test_p->
Merged) {
638 if (dist < best_dist) {
646 tprintf(
"Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
648 best_match->
Mean[0], best_match->
Mean[1],
649 Prototype->
Mean[0], Prototype->
Mean[1]);
658 }
else if (best_match !=
NULL) {
660 tprintf(
"Red proto at %g,%g matched a green one at %g,%g\n",
661 Prototype->
Mean[0], Prototype->
Mean[1],
662 best_match->
Mean[0], best_match->
Mean[1]);
668 pProtoList = ProtoList;
675 tprintf(
"Red proto at %g,%g becoming green\n",
676 Prototype->
Mean[0], Prototype->
Mean[1]);
713 BOOL8 KeepInsigProtos,
723 pProtoList = ProtoList;
739 for (i=0; i < N; i++)
744 for (i=0; i < N; i++)
753 for (i=0; i < N; i++)
762 for (i=0; i < N; i++)
770 NewProtoList =
push_last(NewProtoList, NewProto);
774 return (NewProtoList);
787 if (strcmp (MergeClass->
Label, Label) == 0)
802 strcpy (MergeClass->
Label, Label);
830 free (MergeClass->
Label);
840 LIST LabeledClassList) {
867 for(i=0; i < NumProtos; i++)
871 Values[0] = OldProto->
X;
872 Values[1] = OldProto->
Y;
873 Values[2] = OldProto->
Angle;
875 NewProto->
X = OldProto->
X;
876 NewProto->
Y = OldProto->
Y;
879 NewProto->
A = Values[0];
880 NewProto->
B = Values[1];
881 NewProto->
C = Values[2];
889 for(i=0; i < NumConfigs; i++)
893 for(j=0; j < NumWords; j++)
894 NewConfig[j] = OldConfig[j];
898 return float_classes;
905 register float Slope;
906 register float Intercept;
907 register float Normalizer;
909 Slope = tan (Values [2] * 2 *
PI);
910 Intercept = Values [1] - Slope * Values [0];
911 Normalizer = 1 / sqrt (Slope * Slope + 1.0);
913 Values [0] = Slope * Normalizer;
914 Values [1] = - Normalizer;
915 Values [2] = Intercept * Normalizer;
947 LabeledProtoList->
List =
push(LabeledProtoList->
List, Proto);
949 *NormProtoList =
push(*NormProtoList, LabeledProtoList);
955 BOOL8 CountSigProtos,
956 BOOL8 CountInsigProtos)