Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rejctmap.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: rejctmap.cpp (Formerly rejmap.c)
3  * Description: REJ and REJMAP class functions.
4  * Author: Phil Cheatle
5  * Created: Thu Jun 9 13:46:38 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "mfcpch.h"
21 #include "hosthplb.h"
22 //#include "basefile.h"
23 #include "rejctmap.h"
24 #include "secname.h"
25 #include "params.h"
26 
27 BOOL8 REJ::perm_rejected() { //Is char perm reject?
28  return (flag (R_TESS_FAILURE) ||
29  flag (R_SMALL_XHT) ||
30  flag (R_EDGE_CHAR) ||
31  flag (R_1IL_CONFLICT) ||
32  flag (R_POSTNN_1IL) ||
33  flag (R_REJ_CBLOB) ||
35 }
36 
37 
38 BOOL8 REJ::rej_before_nn_accept() {
39  return flag (R_POOR_MATCH) ||
42 }
43 
44 
45 BOOL8 REJ::rej_between_nn_and_mm() {
46  return flag (R_HYPHEN) ||
47  flag (R_DUBIOUS) ||
49 }
50 
51 
52 BOOL8 REJ::rej_between_mm_and_quality_accept() {
53  return flag (R_BAD_QUALITY);
54 }
55 
56 
57 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() {
58  return flag (R_DOC_REJ) ||
60 }
61 
62 
63 BOOL8 REJ::rej_before_mm_accept() {
64  return rej_between_nn_and_mm () ||
65  (rej_before_nn_accept () &&
67 }
68 
69 
70 BOOL8 REJ::rej_before_quality_accept() {
71  return rej_between_mm_and_quality_accept () ||
72  (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
73 }
74 
75 
76 BOOL8 REJ::rejected() { //Is char rejected?
78  return FALSE;
79  else
80  return (perm_rejected () ||
81  rej_between_quality_and_minimal_rej_accept () ||
82  (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
83 }
84 
85 
86 BOOL8 REJ::accept_if_good_quality() { //potential rej?
87  return (rejected () &&
88  !perm_rejected () &&
89  flag (R_BAD_PERMUTER) &&
90  !flag (R_POOR_MATCH) &&
93  (!rej_between_nn_and_mm () &&
94  !rej_between_mm_and_quality_accept () &&
95  !rej_between_quality_and_minimal_rej_accept ()));
96 }
97 
98 
99 void REJ::setrej_tess_failure() { //Tess generated blank
100  set_flag(R_TESS_FAILURE);
101 }
102 
103 
104 void REJ::setrej_small_xht() { //Small xht char/wd
105  set_flag(R_SMALL_XHT);
106 }
107 
108 
109 void REJ::setrej_edge_char() { //Close to image edge
110  set_flag(R_EDGE_CHAR);
111 }
112 
113 
114 void REJ::setrej_1Il_conflict() { //Initial reject map
115  set_flag(R_1IL_CONFLICT);
116 }
117 
118 
119 void REJ::setrej_postNN_1Il() { //1Il after NN
120  set_flag(R_POSTNN_1IL);
121 }
122 
123 
124 void REJ::setrej_rej_cblob() { //Insert duff blob
125  set_flag(R_REJ_CBLOB);
126 }
127 
128 
129 void REJ::setrej_mm_reject() { //Matrix matcher
130  set_flag(R_MM_REJECT);
131 }
132 
133 
134 void REJ::setrej_bad_repetition() { //Odd repeated char
135  set_flag(R_BAD_REPETITION);
136 }
137 
138 
139 void REJ::setrej_poor_match() { //Failed Rays heuristic
140  set_flag(R_POOR_MATCH);
141 }
142 
143 
145  //TEMP reject_word
146  set_flag(R_NOT_TESS_ACCEPTED);
147 }
148 
149 
151  //TEMP reject_word
152  set_flag(R_CONTAINS_BLANKS);
153 }
154 
155 
156 void REJ::setrej_bad_permuter() { //POTENTIAL reject_word
157  set_flag(R_BAD_PERMUTER);
158 }
159 
160 
161 void REJ::setrej_hyphen() { //PostNN dubious hyphen or .
162  set_flag(R_HYPHEN);
163 }
164 
165 
166 void REJ::setrej_dubious() { //PostNN dubious limit
167  set_flag(R_DUBIOUS);
168 }
169 
170 
171 void REJ::setrej_no_alphanums() { //TEMP reject_word
172  set_flag(R_NO_ALPHANUMS);
173 }
174 
175 
176 void REJ::setrej_mostly_rej() { //TEMP reject_word
177  set_flag(R_MOSTLY_REJ);
178 }
179 
180 
181 void REJ::setrej_xht_fixup() { //xht fixup
182  set_flag(R_XHT_FIXUP);
183 }
184 
185 
186 void REJ::setrej_bad_quality() { //TEMP reject_word
187  set_flag(R_BAD_QUALITY);
188 }
189 
190 
191 void REJ::setrej_doc_rej() { //TEMP reject_word
192  set_flag(R_DOC_REJ);
193 }
194 
195 
196 void REJ::setrej_block_rej() { //TEMP reject_word
197  set_flag(R_BLOCK_REJ);
198 }
199 
200 
201 void REJ::setrej_row_rej() { //TEMP reject_word
202  set_flag(R_ROW_REJ);
203 }
204 
205 
206 void REJ::setrej_unlv_rej() { //TEMP reject_word
207  set_flag(R_UNLV_REJ);
208 }
209 
210 
211 void REJ::setrej_hyphen_accept() { //NN Flipped a char
212  set_flag(R_HYPHEN_ACCEPT);
213 }
214 
215 
216 void REJ::setrej_nn_accept() { //NN Flipped a char
217  set_flag(R_NN_ACCEPT);
218 }
219 
220 
221 void REJ::setrej_mm_accept() { //Matrix matcher
222  set_flag(R_MM_ACCEPT);
223 }
224 
225 
226 void REJ::setrej_quality_accept() { //Quality flip a char
227  set_flag(R_QUALITY_ACCEPT);
228 }
229 
230 
232  //Accept all except blank
233  set_flag(R_MINIMAL_REJ_ACCEPT);
234 }
235 
236 
237 void REJ::full_print(FILE *fp) {
238  #ifndef SECURE_NAMES
239 
240  fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
241  fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
242  fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
243  fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
244  fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
245  fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
246  fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
247  fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
248  fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
249  fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
250  flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
251  fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
252  flag (R_CONTAINS_BLANKS) ? "T" : "F");
253  fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
254  fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
255  fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
256  fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
257  fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
258  fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
259  fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
260  fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
261  fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
262  fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
263  fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
264  fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
265  fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
266  fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
267  fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
268  fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
269  flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
270  #endif
271 }
272 
273 
274 //The REJMAP class has been hacked to use alloc_struct instead of new [].
275 //This is to reduce memory fragmentation only as it is rather kludgy.
276 //alloc_struct by-passes the call to the contsructor of REJ on each
277 //array element. Although the constructor is empty, the BITS16 members
278 //do have a constructor which sets all the flags to 0. The memset
279 //replaces this functionality.
280 
281 REJMAP::REJMAP( //classwise copy
282  const REJMAP &source) {
283  REJ *to;
284  REJ *from = source.ptr;
285  int i;
286 
287  len = source.length ();
288 
289  if (len > 0) {
290  ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ");
291  to = ptr;
292  for (i = 0; i < len; i++) {
293  *to = *from;
294  to++;
295  from++;
296  }
297  }
298  else
299  ptr = NULL;
300 }
301 
302 
303 REJMAP & REJMAP::operator= ( //assign REJMAP
304 const REJMAP & source //from this
305 ) {
306  REJ *
307  to;
308  REJ *
309  from = source.ptr;
310  int
311  i;
312 
313  initialise (source.len);
314  to = ptr;
315  for (i = 0; i < len; i++) {
316  *to = *from;
317  to++;
318  from++;
319  }
320  return *this;
321 }
322 
323 
324 void REJMAP::initialise( //Redefine map
325  inT16 length) {
326  if (ptr != NULL)
327  free_struct (ptr, len * sizeof (REJ), "REJ");
328  len = length;
329  if (len > 0)
330  ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
331  0, len * sizeof (REJ));
332  else
333  ptr = NULL;
334 }
335 
336 
337 inT16 REJMAP::accept_count() { //How many accepted?
338  int i;
339  inT16 count = 0;
340 
341  for (i = 0; i < len; i++) {
342  if (ptr[i].accepted ())
343  count++;
344  }
345  return count;
346 }
347 
348 
349 BOOL8 REJMAP::recoverable_rejects() { //Any non perm rejs?
350  int i;
351 
352  for (i = 0; i < len; i++) {
353  if (ptr[i].recoverable ())
354  return TRUE;
355  }
356  return FALSE;
357 }
358 
359 
361  int i;
362 
363  for (i = 0; i < len; i++) {
364  if (ptr[i].accept_if_good_quality ())
365  return TRUE;
366  }
367  return FALSE;
368 }
369 
370 
371 void REJMAP::remove_pos( //Cut out an element
372  inT16 pos //element to remove
373  ) {
374  REJ *new_ptr; //new, smaller map
375  int i;
376 
377  ASSERT_HOST (pos >= 0);
378  ASSERT_HOST (pos < len);
379  ASSERT_HOST (len > 0);
380 
381  len--;
382  if (len > 0)
383  new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
384  0, len * sizeof (REJ));
385  else
386  new_ptr = NULL;
387 
388  for (i = 0; i < pos; i++)
389  new_ptr[i] = ptr[i]; //copy pre pos
390 
391  for (; pos < len; pos++)
392  new_ptr[pos] = ptr[pos + 1]; //copy post pos
393 
394  //delete old map
395  free_struct (ptr, (len + 1) * sizeof (REJ), "REJ");
396  ptr = new_ptr;
397 }
398 
399 
400 void REJMAP::print(FILE *fp) {
401  int i;
402  char buff[512];
403 
404  for (i = 0; i < len; i++) {
405  buff[i] = ptr[i].display_char ();
406  }
407  buff[i] = '\0';
408  fprintf (fp, "\"%s\"", buff);
409 }
410 
411 
412 void REJMAP::full_print(FILE *fp) {
413  int i;
414 
415  for (i = 0; i < len; i++) {
416  ptr[i].full_print (fp);
417  fprintf (fp, "\n");
418  }
419 }
420 
421 
422 void REJMAP::rej_word_small_xht() { //Reject whole word
423  int i;
424 
425  for (i = 0; i < len; i++) {
426  ptr[i].setrej_small_xht ();
427  }
428 }
429 
430 
431 void REJMAP::rej_word_tess_failure() { //Reject whole word
432  int i;
433 
434  for (i = 0; i < len; i++) {
435  ptr[i].setrej_tess_failure ();
436  }
437 }
438 
439 
440 void REJMAP::rej_word_not_tess_accepted() { //Reject whole word
441  int i;
442 
443  for (i = 0; i < len; i++) {
444  if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted();
445  }
446 }
447 
448 
449 void REJMAP::rej_word_contains_blanks() { //Reject whole word
450  int i;
451 
452  for (i = 0; i < len; i++) {
453  if (ptr[i].accepted()) ptr[i].setrej_contains_blanks();
454  }
455 }
456 
457 
458 void REJMAP::rej_word_bad_permuter() { //Reject whole word
459  int i;
460 
461  for (i = 0; i < len; i++) {
462  if (ptr[i].accepted()) ptr[i].setrej_bad_permuter ();
463  }
464 }
465 
466 
467 void REJMAP::rej_word_xht_fixup() { //Reject whole word
468  int i;
469 
470  for (i = 0; i < len; i++) {
471  if (ptr[i].accepted()) ptr[i].setrej_xht_fixup();
472  }
473 }
474 
475 
476 void REJMAP::rej_word_no_alphanums() { //Reject whole word
477  int i;
478 
479  for (i = 0; i < len; i++) {
480  if (ptr[i].accepted()) ptr[i].setrej_no_alphanums();
481  }
482 }
483 
484 
485 void REJMAP::rej_word_mostly_rej() { //Reject whole word
486  int i;
487 
488  for (i = 0; i < len; i++) {
489  if (ptr[i].accepted()) ptr[i].setrej_mostly_rej();
490  }
491 }
492 
493 
494 void REJMAP::rej_word_bad_quality() { //Reject whole word
495  int i;
496 
497  for (i = 0; i < len; i++) {
498  if (ptr[i].accepted()) ptr[i].setrej_bad_quality();
499  }
500 }
501 
502 
503 void REJMAP::rej_word_doc_rej() { //Reject whole word
504  int i;
505 
506  for (i = 0; i < len; i++) {
507  if (ptr[i].accepted()) ptr[i].setrej_doc_rej();
508  }
509 }
510 
511 
512 void REJMAP::rej_word_block_rej() { //Reject whole word
513  int i;
514 
515  for (i = 0; i < len; i++) {
516  if (ptr[i].accepted()) ptr[i].setrej_block_rej();
517  }
518 }
519 
520 
521 void REJMAP::rej_word_row_rej() { //Reject whole word
522  int i;
523 
524  for (i = 0; i < len; i++) {
525  if (ptr[i].accepted()) ptr[i].setrej_row_rej();
526  }
527 }