Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reject.h File Reference
#include "params.h"
#include "pageres.h"
#include "notdll.h"

Go to the source code of this file.

Functions

void reject_blanks (WERD_RES *word)
void reject_poor_matches (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
float compute_reject_threshold (BLOB_CHOICE_LIST_CLIST *blob_choices)
BOOL8 word_contains_non_1_digit (const char *word, const char *word_lengths)
void dont_allow_1Il (WERD_RES *word)
void flip_hyphens (WERD_RES *word)
void flip_0O (WERD_RES *word)
BOOL8 non_0_digit (const char *str, int length)

Function Documentation

float compute_reject_threshold ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

Definition at line 370 of file reject.cpp.

{
inT16 index; //to ratings
inT16 blob_count; //no of blobs in word
inT16 ok_blob_count = 0; //non TESS rej blobs in word
float *ratings; //array of confidences
float threshold; //rejection threshold
float bestgap; //biggest gap
float gapstart; //bottom of gap
//super iterator
BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
BLOB_CHOICE_IT choice_it; //real iterator
blob_count = blob_choices->length ();
ratings = (float *) alloc_mem (blob_count * sizeof (float));
for (list_it.mark_cycle_pt (), index = 0;
!list_it.cycled_list (); list_it.forward (), index++) {
choice_it.set_to_list (list_it.data ());
if (choice_it.length () > 0) {
ratings[ok_blob_count] = choice_it.data ()->certainty ();
//get in an array
// tprintf("Rating[%d]=%c %g %g\n",
// index,choice_it.data()->char_class(),
// choice_it.data()->rating(),choice_it.data()->certainty());
ok_blob_count++;
}
}
ASSERT_HOST (index == blob_count);
qsort (ratings, ok_blob_count, sizeof (float), sort_floats);
//sort them
bestgap = 0;
gapstart = ratings[0] - 1; //all reject if none better
if (ok_blob_count >= 3) {
for (index = 0; index < ok_blob_count - 1; index++) {
if (ratings[index + 1] - ratings[index] > bestgap) {
bestgap = ratings[index + 1] - ratings[index];
//find biggest
gapstart = ratings[index];
}
}
}
threshold = gapstart + bestgap / 2;
// tprintf("First=%g, last=%g, gap=%g, threshold=%g\n",
// ratings[0],ratings[index],bestgap,threshold);
free_mem(ratings);
return threshold;
}
void dont_allow_1Il ( WERD_RES word)
void flip_0O ( WERD_RES word)
void flip_hyphens ( WERD_RES word)
BOOL8 non_0_digit ( const char *  str,
int  length 
)
void reject_blanks ( WERD_RES word)

Definition at line 290 of file reject.cpp.

{
inT16 i;
inT16 offset;
for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
offset += word->best_choice->unichar_lengths()[i], i += 1) {
if (word->best_choice->unichar_string()[offset] == ' ')
//rej unrecognised blobs
word->reject_map[i].setrej_tess_failure ();
}
}
void reject_poor_matches ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)

Definition at line 319 of file reject.cpp.

{
float threshold;
inT16 i = 0;
inT16 offset = 0;
//super iterator
BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
BLOB_CHOICE_IT choice_it; //real iterator
#ifndef SECURE_NAMES
if (strlen(word->best_choice->unichar_lengths().string()) !=
list_it.length()) {
("ASSERT FAIL string:\"%s\"; strlen=%d; choices len=%d; blob len=%d\n",
strlen (word->best_choice->unichar_lengths().string()), list_it.length(),
word->box_word->length());
}
#endif
ASSERT_HOST (strlen (word->best_choice->unichar_lengths().string ()) ==
list_it.length ());
ASSERT_HOST(word->box_word->length() == list_it.length());
threshold = compute_reject_threshold (blob_choices);
for (list_it.mark_cycle_pt ();
!list_it.cycled_list (); list_it.forward (), i++,
offset += word->best_choice->unichar_lengths()[i]) {
/* NB - only compares the threshold against the TOP choice char in the
choices list for a blob !! - the selected one may be below the threshold
*/
choice_it.set_to_list (list_it.data ());
if ((word->best_choice->unichar_string()[offset] == ' ') ||
(choice_it.length () == 0))
//rej unrecognised blobs
word->reject_map[i].setrej_tess_failure ();
else if (choice_it.data ()->certainty () < threshold)
//rej poor score blob
word->reject_map[i].setrej_poor_match ();
}
}
BOOL8 word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)