tesseract  3.04.00
tess_lang_mod_edge.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: tess_lang_mod_edge.cpp
3  * Description: Implementation of the Tesseract Language Model Edge Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "tess_lang_mod_edge.h"
21 #include "const.h"
22 #include "unichar.h"
23 
24 
25 
26 namespace tesseract {
27 // OOD constructor
29  root_ = false;
30  cntxt_ = cntxt;
31  dawg_ = NULL;
32  start_edge_ = 0;
33  end_edge_ = 0;
34  edge_mask_ = 0;
35  class_id_ = class_id;
36  str_ = cntxt_->CharacterSet()->ClassString(class_id);
37  path_cost_ = Cost();
38 }
39 
40 // leading, trailing punc constructor and single byte UTF char
42  const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
43  root_ = false;
44  cntxt_ = cntxt;
45  dawg_ = dawg;
46  start_edge_ = edge_idx;
47  end_edge_ = edge_idx;
48  edge_mask_ = 0;
49  class_id_ = class_id;
50  str_ = cntxt_->CharacterSet()->ClassString(class_id);
51  path_cost_ = Cost();
52 }
53 
54 // dict constructor: multi byte UTF char
56  EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
57  int class_id) {
58  root_ = false;
59  cntxt_ = cntxt;
60  dawg_ = dawg;
61  start_edge_ = start_edge_idx;
62  end_edge_ = end_edge_idx;
63  edge_mask_ = 0;
64  class_id_ = class_id;
65  str_ = cntxt_->CharacterSet()->ClassString(class_id);
66  path_cost_ = Cost();
67 }
68 
70  char *char_ptr = new char[256];
71  if (!char_ptr) {
72  return NULL;
73  }
74 
75  char dawg_str[256];
76  char edge_str[32];
77  if (dawg_ == (Dawg *)DAWG_OOD) {
78  strcpy(dawg_str, "OOD");
79  } else if (dawg_ == (Dawg *)DAWG_NUMBER) {
80  strcpy(dawg_str, "NUM");
81  } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
82  strcpy(dawg_str, "Main");
83  } else if (dawg_->permuter() == USER_DAWG_PERM) {
84  strcpy(dawg_str, "User");
85  } else if (dawg_->permuter() == DOC_DAWG_PERM) {
86  strcpy(dawg_str, "Doc");
87  } else {
88  strcpy(dawg_str, "N/A");
89  }
90 
91  sprintf(edge_str, "%d", static_cast<int>(start_edge_));
92  if (IsLeadingPuncEdge(edge_mask_)) {
93  strcat(edge_str, "-LP");
94  }
95  if (IsTrailingPuncEdge(edge_mask_)) {
96  strcat(edge_str, "-TP");
97  }
98  sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
99  dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
100 
101  return char_ptr;
102 }
103 
105  const Dawg *dawg,
106  NODE_REF parent_node,
107  LangModEdge **edge_array) {
108  int edge_cnt = 0;
109  NodeChildVector vec;
110  dawg->unichar_ids_of(parent_node, &vec, false); // find all children
111  for (int i = 0; i < vec.size(); ++i) {
112  const NodeChild &child = vec[i];
113  if (child.unichar_id == INVALID_UNICHAR_ID) continue;
114  edge_array[edge_cnt] =
115  new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
116  if (edge_array[edge_cnt] != NULL) edge_cnt++;
117  }
118  return edge_cnt;
119 }
120 }
const char_32 * ClassString(int class_id) const
Definition: char_set.h:104
UNICHAR_ID unichar_id
Definition: dawg.h:61
int size() const
Definition: genericvector.h:72
EDGE_REF edge_ref
Definition: dawg.h:62
PermuterType permuter() const
Definition: dawg.h:129
TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array, EDGE_REF edge, int class_id)
#define NULL
Definition: host.h:144
#define DAWG_OOD
inT64 EDGE_REF
Definition: dawg.h:54
#define DAWG_NUMBER
#define IsLeadingPuncEdge(edge_mask)
static int CreateChildren(CubeRecoContext *cntxt, const Dawg *edges, NODE_REF edge_reg, LangModEdge **lm_edges)
CharSet * CharacterSet() const
#define IsTrailingPuncEdge(edge_mask)
inT64 NODE_REF
Definition: dawg.h:55
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0