Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
normalis.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: normalis.cpp (Formerly denorm.c)
3  * Description: Code for the DENORM class.
4  * Author: Ray Smith
5  * Created: Thu Apr 23 09:22:43 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 #include "mfcpch.h" // Precompiled header include must be first.
20 
21 #include "normalis.h"
22 
23 #include <stdlib.h>
24 
25 #include "allheaders.h"
26 #include "blobs.h"
27 #include "helpers.h"
28 #include "ocrblock.h"
29 #include "unicharset.h"
30 #include "werd.h"
31 
32 
34  Init();
35 }
36 
37 // TODO(rays) Abolish all non-default constructors.
38 DENORM::DENORM(float x, float scaling, ROW *src) {
39  Init();
40  x_origin_ = x; // just copy
41  y_origin_ = 0.0f;
42  x_scale_ = y_scale_ = scaling;
43  row_ = src;
44 }
45 
46 DENORM::DENORM(float x, // from same pieces
47  float scaling,
48  double line_m, // default line: y = mx + c
49  double line_c,
50  inT16 seg_count, // no of segments
51  DENORM_SEG *seg_pts, // actual segments
52  BOOL8 using_row, // as baseline
53  ROW *src) {
54  Init();
55  x_origin_ = x; // just copy
56  y_origin_ = line_c;
57  ASSERT_HOST(line_m == 0.0);
58  x_scale_ = y_scale_ = scaling;
59  SetSegments(seg_pts, seg_count);
60 }
61 
62 
63 DENORM::DENORM(const DENORM &src) {
64  num_segs_ = 0;
65  segs_ = NULL;
66  rotation_ = NULL;
67  *this = src;
68 }
69 
70 
72  Clear();
73  inverse_ = src.inverse_;
74  pix_ = src.pix_;
75  block_ = src.block_;
76  row_ = src.row_;
77  if (src.rotation_ == NULL)
78  rotation_ = NULL;
79  else
80  rotation_ = new FCOORD(*src.rotation_);
81  predecessor_ = src.predecessor_;
82  SetSegments(src.segs_, src.num_segs_);
83  x_origin_ = src.x_origin_;
84  y_origin_ = src.y_origin_;
85  x_scale_ = src.x_scale_;
86  y_scale_ = src.y_scale_;
87  final_xshift_ = src.final_xshift_;
88  final_yshift_ = src.final_yshift_;
89  return *this;
90 }
91 
93  Clear();
94 }
95 
96 // Setup for a baseline normalization. If there are segs, then they
97 // are used, otherwise, if there is a row, that is used, otherwise the
98 // bottom of the word_box is used for the baseline.
99 void DENORM::SetupBLNormalize(const BLOCK* block, const ROW* row,
100  float x_height, const TBOX& word_box,
101  int num_segs, const DENORM_SEG* segs) {
102  float scale = kBlnXHeight / x_height;
103  float x_origin = (word_box.left() + word_box.right()) / 2.0f;
104  float y_origin = 0.0f;
105  if (num_segs == 0 && row == NULL) {
106  y_origin = word_box.bottom();
107  }
108  SetupNormalization(block, row, NULL, NULL, segs, num_segs,
109  x_origin, y_origin, scale, scale,
110  0.0f, static_cast<float>(kBlnBaselineOffset));
111 }
112 
113 // Initializes the denorm for a transformation. For details see the large
114 // comment in normalis.h.
115 // Arguments:
116 // block: if not NULL, then this is the first transformation, and
117 // block->re_rotation() needs to be used after the Denorm
118 // transformation to get back to the image coords.
119 // row: if not NULL, then row->baseline(x) is added to the y_origin, unless
120 // segs is not NULL and num_segs > 0, in which case they are used.
121 // rotation: if not NULL, apply this rotation after translation to the
122 // origin and scaling. (Usually a classify rotation.)
123 // predecessor: if not NULL, then predecessor has been applied to the
124 // input space and needs to be undone to complete the inverse.
125 // segs: if not NULL and num_segs > 0, then the segs provide the y_origin
126 // and the y_scale at a given source x.
127 // num_segs: the number of segs.
128 // The above pointers are not owned by this DENORM and are assumed to live
129 // longer than this denorm, except rotation, which is deep copied on input.
130 //
131 // x_origin: The x origin which will be mapped to final_xshift in the result.
132 // y_origin: The y origin which will be mapped to final_yshift in the result.
133 // Added to result of row->baseline(x) if not NULL.
134 //
135 // x_scale: scale factor for the x-coordinate.
136 // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
137 // Note that these scale factors apply to the same x and y system as the
138 // x-origin and y-origin apply, ie after any block rotation, but before
139 // the rotation argument is applied.
140 //
141 // final_xshift: The x component of the final translation.
142 // final_yshift: The y component of the final translation.
144  const ROW* row,
145  const FCOORD* rotation,
146  const DENORM* predecessor,
147  const DENORM_SEG* segs, int num_segs,
148  float x_origin, float y_origin,
149  float x_scale, float y_scale,
150  float final_xshift, float final_yshift) {
151  Clear();
152  block_ = block;
153  row_ = row;
154  if (rotation == NULL)
155  rotation_ = NULL;
156  else
157  rotation_ = new FCOORD(*rotation);
158  predecessor_ = predecessor;
159  SetSegments(segs, num_segs);
160  x_origin_ = x_origin;
161  y_origin_ = y_origin;
162  x_scale_ = x_scale;
163  y_scale_ = y_scale;
164  final_xshift_ = final_xshift;
165  final_yshift_ = final_yshift;
166 }
167 
168 // Transforms the given coords one step forward to normalized space, without
169 // using any block rotation or predecessor.
170 void DENORM::LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const {
171  FCOORD src_pt(pt.x, pt.y);
172  FCOORD float_result;
173  LocalNormTransform(src_pt, &float_result);
174  transformed->x = IntCastRounded(float_result.x());
175  transformed->y = IntCastRounded(float_result.y());
176 }
177 void DENORM::LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const {
178  FCOORD translated(pt.x() - x_origin_, pt.y() - YOriginAtOrigX(pt.x()));
179  translated.set_x(translated.x() * x_scale_);
180  translated.set_y(translated.y() * YScaleAtOrigX(pt.x()));
181  if (rotation_ != NULL)
182  translated.rotate(*rotation_);
183  transformed->set_x(translated.x() + final_xshift_);
184  transformed->set_y(translated.y() + final_yshift_);
185 }
186 
187 // Transforms the given coords forward to normalized space using the
188 // full transformation sequence defined by the block rotation, the
189 // predecessors, deepest first, and finally this.
190 void DENORM::NormTransform(const TPOINT& pt, TPOINT* transformed) const {
191  FCOORD src_pt(pt.x, pt.y);
192  FCOORD float_result;
193  NormTransform(src_pt, &float_result);
194  transformed->x = IntCastRounded(float_result.x());
195  transformed->y = IntCastRounded(float_result.y());
196 }
197 void DENORM::NormTransform(const FCOORD& pt, FCOORD* transformed) const {
198  FCOORD src_pt(pt);
199  if (predecessor_ != NULL) {
200  predecessor_->NormTransform(pt, &src_pt);
201  } else if (block_ != NULL) {
202  FCOORD fwd_rotation(block_->re_rotation().x(), -block_->re_rotation().y());
203  src_pt.rotate(fwd_rotation);
204  }
205  LocalNormTransform(src_pt, transformed);
206 }
207 
208 // Transforms the given coords one step back to source space, without
209 // using to any block rotation or predecessor.
210 void DENORM::LocalDenormTransform(const TPOINT& pt, TPOINT* original) const {
211  FCOORD src_pt(pt.x, pt.y);
212  FCOORD float_result;
213  LocalDenormTransform(src_pt, &float_result);
214  original->x = IntCastRounded(float_result.x());
215  original->y = IntCastRounded(float_result.y());
216 }
217 void DENORM::LocalDenormTransform(const FCOORD& pt, FCOORD* original) const {
218  FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_);
219  if (rotation_ != NULL) {
220  FCOORD inverse_rotation(rotation_->x(), -rotation_->y());
221  rotated.rotate(inverse_rotation);
222  }
223  original->set_x(rotated.x() / x_scale_ + x_origin_);
224  float y_scale = y_scale_;
225  if (num_segs_ > 0)
226  y_scale = YScaleAtOrigX(original->x());
227  original->set_y(rotated.y() / y_scale + YOriginAtOrigX(original->x()));
228 }
229 
230 // Transforms the given coords all the way back to source image space using
231 // the full transformation sequence defined by this and its predecesors
232 // recursively, shallowest first, and finally any block re_rotation.
233 void DENORM::DenormTransform(const TPOINT& pt, TPOINT* original) const {
234  FCOORD src_pt(pt.x, pt.y);
235  FCOORD float_result;
236  DenormTransform(src_pt, &float_result);
237  original->x = IntCastRounded(float_result.x());
238  original->y = IntCastRounded(float_result.y());
239 }
240 void DENORM::DenormTransform(const FCOORD& pt, FCOORD* original) const {
241  LocalDenormTransform(pt, original);
242  if (predecessor_ != NULL) {
243  predecessor_->DenormTransform(*original, original);
244  } else if (block_ != NULL) {
245  original->rotate(block_->re_rotation());
246  }
247 }
248 
249 // Normalize a blob using blob transformations. Less accurate, but
250 // more accurately copies the old way.
251 void DENORM::LocalNormBlob(TBLOB* blob) const {
252  TBOX blob_box = blob->bounding_box();
253  float x_center = (blob_box.left() + blob_box.right()) / 2.0f;
254  ICOORD translation(-IntCastRounded(x_origin_),
255  -IntCastRounded(YOriginAtOrigX(x_center)));
256  blob->Move(translation);
257  // Note that the old way of scaling only allowed for a single
258  // scale factor.
259  float scale = YScaleAtOrigX(x_center);
260  if (scale != 1.0f)
261  blob->Scale(scale);
262  if (rotation_ != NULL)
263  blob->Rotate(*rotation_);
264  translation.set_x(IntCastRounded(final_xshift_));
265  translation.set_y(IntCastRounded(final_yshift_));
266  blob->Move(translation);
267 }
268 
269 // Fills in the x-height range accepted by the given unichar_id, given its
270 // bounding box in the usual baseline-normalized coordinates, with some
271 // initial crude x-height estimate (such as word size) and this denoting the
272 // transformation that was used. Returns false, and an empty range if the
273 // bottom is a mis-fit. Returns true and empty [0, 0] range if the bottom
274 // fits, but the top is impossible.
275 bool DENORM::XHeightRange(int unichar_id, const UNICHARSET& unicharset,
276  const TBOX& bbox,
277  inT16* min_xht, inT16* max_xht) const {
278  // Clip the top and bottom to the limit of normalized feature space.
279  int top = ClipToRange<int>(bbox.top(), 0, kBlnCellHeight - 1);
280  int bottom = ClipToRange<int>(bbox.bottom(), 0, kBlnCellHeight - 1);
281  // A tolerance of yscale corresponds to 1 pixel in the image.
282  double tolerance = y_scale();
283  int min_bottom, max_bottom, min_top, max_top;
284  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
285  &min_top, &max_top);
286  // Default returns indicate a mis-fit.
287  *min_xht = 0;
288  *max_xht = 0;
289  // Chars with a misfitting bottom might be sub/superscript/dropcap, or might
290  // just be wrongly classified. Return an empty range so they have to be
291  // good to be considered.
292  if (bottom < min_bottom - tolerance || bottom > max_bottom + tolerance) {
293  return false;
294  }
295  // To help very high cap/xheight ratio fonts accept the correct x-height,
296  // and to allow the large caps in small caps to accept the xheight of the
297  // small caps, add kBlnBaselineOffset to chars with a maximum max.
298  if (max_top == kBlnCellHeight - 1)
299  max_top += kBlnBaselineOffset;
300  int height = top - kBlnBaselineOffset;
301  double min_height = min_top - kBlnBaselineOffset - tolerance;
302  double max_height = max_top - kBlnBaselineOffset + tolerance;
303  if (min_height <= 0.0) {
304  if (height <= 0 || max_height > 0)
305  *max_xht = MAX_INT16; // Anything will do.
306  } else if (height > 0) {
307  int result = IntCastRounded(height * kBlnXHeight / y_scale() / min_height);
308  *max_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
309  }
310  if (max_height > 0.0 && height > 0) {
311  int result = IntCastRounded(height * kBlnXHeight / y_scale() / max_height);
312  *min_xht = static_cast<inT16>(ClipToRange(result, 0, MAX_INT16));
313  }
314  return true;
315 }
316 
317 // ============== Private Code ======================
318 
319 // Free allocated memory and clear pointers.
320 void DENORM::Clear() {
321  if (segs_ != NULL) {
322  delete [] segs_;
323  segs_ = NULL;
324  num_segs_ = 0;
325  }
326  if (rotation_ != NULL) {
327  delete rotation_;
328  rotation_ = NULL;
329  }
330 }
331 
332 // Setup default values.
333 void DENORM::Init() {
334  inverse_ = false;
335  pix_ = NULL;
336  block_ = NULL;
337  row_ = NULL;
338  rotation_ = NULL;
339  predecessor_ = NULL;
340  segs_ = NULL;
341  num_segs_ = 0;
342  x_origin_ = 0.0f;
343  y_origin_ = 0.0f;
344  x_scale_ = 1.0f;
345  y_scale_ = 1.0f;
346  final_xshift_ = 0.0f;
347  final_yshift_ = static_cast<float>(kBlnBaselineOffset);
348 }
349 
350 // Returns the y-origin at the original (un-normalized) x.
351 float DENORM::YOriginAtOrigX(float orig_x) const {
352  if (num_segs_ > 0) {
353  const DENORM_SEG* seg = BinarySearchSegment(orig_x);
354  if (seg->ycoord != -MAX_INT32) {
355  return seg->ycoord;
356  }
357  }
358  if (row_ != NULL)
359  return row_->base_line(orig_x) + y_origin_;
360  else
361  return y_origin_;
362 }
363 
364 // Returns the y-scale at the original (un-normalized) x.
365 float DENORM::YScaleAtOrigX(float orig_x) const {
366  if (num_segs_ > 0) {
367  const DENORM_SEG* seg = BinarySearchSegment(orig_x);
368  if (seg->scale_factor > 0.0)
369  return seg->scale_factor;
370  }
371  return y_scale_;
372 }
373 
374 
375 // Compare two segments by xstart for use with qsort(3) and bsearch(3)
376 static int CompareSegByXStart(const DENORM_SEG* a, const DENORM_SEG* b) {
377  if (a->xstart < b->xstart)
378  return -1;
379  else if (a->xstart > b->xstart)
380  return 1;
381  return 0;
382 }
383 
384 // Deep copy the array of segments for use as a y_origin and y_scale.
385 void DENORM::SetSegments(const DENORM_SEG* new_segs, int seg_count) {
386  if (segs_ != NULL)
387  delete [] segs_;
388  if (seg_count > 0) {
389  segs_ = new DENORM_SEG[seg_count];
390  memcpy(segs_, new_segs, seg_count * sizeof(new_segs[0]));
391  // It is possible, if infrequent that the segments may be out of order.
392  // since we are searching with a binary search, keep them in order.
393  qsort(segs_, num_segs_, sizeof(segs_[0]),
394  reinterpret_cast<int(*)(const void*, const void*)>(
395  &CompareSegByXStart));
396  } else {
397  num_segs_ = 0;
398  segs_ = NULL;
399  }
400 }
401 
402 // Finds the appropriate segment for a given original x-coord
403 const DENORM_SEG* DENORM::BinarySearchSegment(float orig_x) const {
404  int bottom, top, middle; // binary search
405  bottom = 0;
406  top = num_segs_;
407  do {
408  middle = (bottom + top) / 2;
409  if (segs_[middle].xstart > orig_x)
410  top = middle;
411  else
412  bottom = middle;
413  }
414  while (top - bottom > 1);
415  return &segs_[bottom];
416 }