Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
colpartitiongrid.cpp
Go to the documentation of this file.
1 
2 // File: colpartitionrid.h
3 // Description: Class collecting code that acts on a BBGrid of ColPartitions.
4 // Author: Ray Smith
5 // Created: Mon Oct 05 08:42:01 PDT 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "colpartitiongrid.h"
21 #include "colpartitionset.h"
22 #include "imagefind.h"
23 
24 namespace tesseract {
25 
26 BOOL_VAR(textord_tabfind_show_color_fit, false, "Show stroke widths");
27 
28 // Max pad factor used to search the neighbourhood of a partition to smooth
29 // partition types.
30 const int kMaxPadFactor = 6;
31 // Max multiple of size (min(height, width)) for the distance of the nearest
32 // neighbour for the change of type to be used.
34 // Max RMS color noise to compare colors.
35 const int kMaxRMSColorNoise = 128;
36 // Minimum number of blobs in text to make a strong text partition.
37 const int kHorzStrongTextlineCount = 10;
38 // Maximum number of lines in a credible figure caption.
39 const int kMaxCaptionLines = 7;
40 // Min ratio between biggest and smallest gap to bound a caption.
41 const double kMinCaptionGapRatio = 2.0;
42 // Min ratio between biggest gap and mean line height to bound a caption.
43 const double kMinCaptionGapHeightRatio = 0.5;
44 // Min fraction of ColPartition height to be overlapping for margin purposes.
45 const double kMarginOverlapFraction = 0.25;
46 // Size ratio required to consider an unmerged overlapping partition to be big.
47 const double kBigPartSizeRatio = 1.75;
48 // Allowed proportional change in stroke width to match for smoothing.
49 const double kStrokeWidthFractionTolerance = 0.25;
50 // Allowed constant change in stroke width to match for smoothing.
51 const double kStrokeWidthConstantTolerance = 2.0;
52 // Fraction of gridsize to allow arbitrary overlap between partitions.
54 // Max vertical distance of neighbouring ColPartition as a multiple of
55 // partition height for it to be a partner.
56 // TODO(rays) fix the problem that causes a larger number to not work well.
57 // The value needs to be larger as sparse text blocks in a page that gets
58 // marked as single column will not find adjacent lines as partners, and
59 // will merge horizontally distant, but aligned lines. See rep.4B3 p5.
60 // The value needs to be small because double-spaced legal docs written
61 // in a single column, but justified courier have widely spaced lines
62 // that need to get merged before they partner-up with the lines above
63 // and below. See legal.3B5 p13/17. Neither of these should depend on
64 // the value of kMaxPartitionSpacing to be successful, and ColPartition
65 // merging needs attention to fix this problem.
66 const double kMaxPartitionSpacing = 1.75;
67 // Margin by which text has to beat image or vice-versa to make a firm
68 // decision in GridSmoothNeighbour.
69 const int kSmoothDecisionMargin = 4;
70 
72 }
74  const ICOORD& bleft, const ICOORD& tright)
75  : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize,
76  bleft, tright) {
77 }
78 
80 }
81 
82 // Handles a click event in a display window.
83 void ColPartitionGrid::HandleClick(int x, int y) {
85  ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
86  // Run a radial search for partitions that overlap.
87  ColPartitionGridSearch radsearch(this);
88  radsearch.SetUniqueMode(true);
89  radsearch.StartRadSearch(x, y, 1);
90  ColPartition* neighbour;
91  FCOORD click(x, y);
92  while ((neighbour = radsearch.NextRadSearch()) != NULL) {
93  TBOX nbox = neighbour->bounding_box();
94  if (nbox.contains(click)) {
95  tprintf("Block box:");
96  neighbour->bounding_box().print();
97  neighbour->Print();
98  }
99  }
100 }
101 
102 // Merges ColPartitions in the grid that look like they belong in the same
103 // textline.
104 // For all partitions in the grid, calls the box_cb permanent callback
105 // to compute the search box, seaches the box, and if a candidate is found,
106 // calls the confirm_cb to check any more rules. If the confirm_cb returns
107 // true, then the partitions are merged.
108 // Both callbacks are deleted before returning.
111  TessResultCallback2<bool, const ColPartition*,
112  const ColPartition*>* confirm_cb) {
113  // Iterate the ColPartitions in the grid.
114  ColPartitionGridSearch gsearch(this);
115  gsearch.StartFullSearch();
116  ColPartition* part;
117  while ((part = gsearch.NextFullSearch()) != NULL) {
118  if (MergePart(box_cb, confirm_cb, part))
119  gsearch.RepositionIterator();
120  }
121  delete box_cb;
122  delete confirm_cb;
123 }
124 
125 // For the given partition, calls the box_cb permanent callback
126 // to compute the search box, searches the box, and if a candidate is found,
127 // calls the confirm_cb to check any more rules. If the confirm_cb returns
128 // true, then the partitions are merged.
129 // Returns true if the partition is consumed by one or more merges.
132  TessResultCallback2<bool, const ColPartition*,
133  const ColPartition*>* confirm_cb,
134  ColPartition* part) {
135  if (part->IsUnMergeableType())
136  return false;
137  bool any_done = false;
138  // Repeatedly merge part while we find a best merge candidate that works.
139  bool merge_done = false;
140  do {
141  merge_done = false;
142  TBOX box = part->bounding_box();
143  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
144  if (debug) {
145  tprintf("Merge candidate:");
146  box.print();
147  }
148  // Set up a rectangle search bounded by the part.
149  if (!box_cb->Run(part, &box))
150  continue;
151  // Create a list of merge candidates.
152  ColPartition_CLIST merge_candidates;
153  FindMergeCandidates(part, box, debug, &merge_candidates);
154  // Find the best merge candidate based on minimal overlap increase.
155  int overlap_increase;
156  ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug,
157  confirm_cb,
158  &overlap_increase);
159  if (neighbour != NULL && overlap_increase <= 0) {
160  if (debug) {
161  tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
162  part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
163  overlap_increase);
164  }
165  // Looks like a good candidate so merge it.
166  RemoveBBox(neighbour);
167  // We will modify the box of part, so remove it from the grid, merge
168  // it and then re-insert it into the grid.
169  RemoveBBox(part);
170  part->Absorb(neighbour, NULL);
171  InsertBBox(true, true, part);
172  merge_done = true;
173  any_done = true;
174  } else if (neighbour != NULL) {
175  if (debug) {
176  tprintf("Overlapped when merged with increase %d: ", overlap_increase);
177  neighbour->bounding_box().print();
178  }
179  } else if (debug) {
180  tprintf("No candidate neighbour returned\n");
181  }
182  } while (merge_done);
183  return any_done;
184 }
185 
186 // Returns true if the given part and merge candidate might believably
187 // be part of a single text line according to the default rules.
188 // In general we only want to merge partitions that look like they
189 // are on the same text line, ie their median limits overlap, but we have
190 // to make exceptions for diacritics and stray punctuation.
191 static bool OKMergeCandidate(const ColPartition* part,
192  const ColPartition* candidate,
193  bool debug) {
194  const TBOX& part_box = part->bounding_box();
195  if (candidate == part)
196  return false; // Ignore itself.
197  if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType())
198  return false; // Don't mix inappropriate types.
199 
200  const TBOX& c_box = candidate->bounding_box();
201  if (debug) {
202  tprintf("Examining merge candidate:");
203  c_box.print();
204  }
205  // Candidates must be within a reasonable distance.
206  if (candidate->IsVerticalType() || part->IsVerticalType()) {
207  int h_dist = -part->HCoreOverlap(*candidate);
208  if (h_dist >= MAX(part_box.width(), c_box.width()) / 2) {
209  if (debug)
210  tprintf("Too far away: h_dist = %d\n", h_dist);
211  return false;
212  }
213  } else {
214  // Coarse filter by vertical distance between partitions.
215  int v_dist = -part->VCoreOverlap(*candidate);
216  if (v_dist >= MAX(part_box.height(), c_box.height()) / 2) {
217  if (debug)
218  tprintf("Too far away: v_dist = %d\n", v_dist);
219  return false;
220  }
221  // Candidates must either overlap in median y,
222  // or part or candidate must be an acceptable diacritic.
223  if (!part->VSignificantCoreOverlap(*candidate) &&
224  !part->OKDiacriticMerge(*candidate, debug) &&
225  !candidate->OKDiacriticMerge(*part, debug)) {
226  if (debug)
227  tprintf("Candidate fails overlap and diacritic tests!\n");
228  return false;
229  }
230  }
231  return true;
232 }
233 
234 // Helper function to compute the increase in overlap of the parts list of
235 // Colpartitions with the combination of merge1 and merge2, compared to
236 // the overlap with them uncombined.
237 // An overlap is not counted if passes the OKMergeOverlap test with ok_overlap
238 // as the pixel overlap limit. merge1 and merge2 must both be non-NULL.
239 static int IncreaseInOverlap(const ColPartition* merge1,
240  const ColPartition* merge2,
241  int ok_overlap,
242  ColPartition_CLIST* parts) {
243  ASSERT_HOST(merge1 != NULL && merge2 != NULL);
244  int total_area = 0;
245  ColPartition_C_IT it(parts);
246  TBOX merged_box(merge1->bounding_box());
247  merged_box += merge2->bounding_box();
248  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
249  ColPartition* part = it.data();
250  if (part == merge1 || part == merge2)
251  continue;
252  TBOX part_box = part->bounding_box();
253  // Compute the overlap of the merged box with part.
254  int overlap_area = part_box.intersection(merged_box).area();
255  if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2,
256  ok_overlap, false)) {
257  total_area += overlap_area;
258  // Subtract the overlap of merge1 and merge2 individually.
259  overlap_area = part_box.intersection(merge1->bounding_box()).area();
260  if (overlap_area > 0)
261  total_area -= overlap_area;
262  TBOX intersection_box = part_box.intersection(merge2->bounding_box());
263  overlap_area = intersection_box.area();
264  if (overlap_area > 0) {
265  total_area -= overlap_area;
266  // Add back the 3-way area.
267  intersection_box &= merge1->bounding_box(); // In-place intersection.
268  overlap_area = intersection_box.area();
269  if (overlap_area > 0)
270  total_area += overlap_area;
271  }
272  }
273  }
274  return total_area;
275 }
276 
277 // Helper function to test that each partition in candidates is either a
278 // good diacritic merge with part or an OK merge candidate with all others
279 // in the candidates list.
280 // ASCII Art Scenario:
281 // We sometimes get text such as "join-this" where the - is actually a long
282 // dash culled from a standard set of extra characters that don't match the
283 // font of the text. This makes its strokewidth not match and forms a broken
284 // set of 3 partitions for "join", "-" and "this" and the dash may slightly
285 // overlap BOTH words.
286 // ------- -------
287 // | ==== |
288 // ------- -------
289 // The standard merge rule: "you can merge 2 partitions as long as there is
290 // no increase in overlap elsewhere" fails miserably here. Merge any pair
291 // of partitions and the combined box overlaps more with the third than
292 // before. To allow the merge, we need to consider whether it is safe to
293 // merge everything, without merging separate text lines. For that we need
294 // everything to be an OKMergeCandidate (which is supposed to prevent
295 // separate text lines merging), but this is hard for diacritics to satisfy,
296 // so an alternative to being OKMergeCandidate with everything is to be an
297 // OKDiacriticMerge with part as the base character.
298 static bool TestCompatibleCandidates(const ColPartition& part, bool debug,
299  ColPartition_CLIST* candidates) {
300  ColPartition_C_IT it(candidates);
301  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
302  ColPartition* candidate = it.data();
303  if (!candidate->OKDiacriticMerge(part, false)) {
304  ColPartition_C_IT it2(it);
305  for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
306  ColPartition* candidate2 = it2.data();
307  if (candidate2 != candidate &&
308  !OKMergeCandidate(candidate, candidate2, false)) {
309  if (debug) {
310  tprintf("NC overlap failed:Candidate:");
311  candidate2->bounding_box().print();
312  tprintf("fails to be a good merge with:");
313  candidate->bounding_box().print();
314  }
315  return false;
316  }
317  }
318  }
319  }
320  return true;
321 }
322 
323 // Finds all the ColPartitions in the grid that overlap with the given
324 // box and returns them SortByBoxLeft(ed) and uniqued in the given list.
325 // Any partition equal to not_this (may be NULL) is excluded.
327  const ColPartition* not_this,
328  ColPartition_CLIST* parts) {
329  ColPartitionGridSearch rsearch(this);
330  rsearch.StartRectSearch(box);
331  ColPartition* part;
332  while ((part = rsearch.NextRectSearch()) != NULL) {
333  if (part != not_this)
334  parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
335  }
336 }
337 
338 // Finds and returns the best candidate ColPartition to merge with part,
339 // selected from the candidates list, based on the minimum increase in
340 // pairwise overlap among all the partitions overlapped by the combined box.
341 // If overlap_increase is not NULL then it returns the increase in overlap
342 // that would result from the merge.
343 // confirm_cb is a permanent callback that (if non-null) will be used to
344 // confirm the validity of a proposed merge candidate before selecting it.
345 //
346 // ======HOW MERGING WORKS======
347 // The problem:
348 // We want to merge all the parts of a textline together, but avoid merging
349 // separate textlines. Diacritics, i dots, punctuation, and broken characters
350 // are examples of small bits that need merging with the main textline.
351 // Drop-caps and descenders in one line that touch ascenders in the one below
352 // are examples of cases where we don't want to merge.
353 //
354 // The solution:
355 // Merges that increase overlap among other partitions are generally bad.
356 // Those that don't increase overlap (much) and minimize the total area
357 // seem to be good.
358 //
359 // Ascii art example:
360 // The text:
361 // groggy descenders
362 // minimum ascenders
363 // The boxes: The === represents a small box near or overlapping the lower box.
364 // -----------------
365 // | |
366 // -----------------
367 // -===-------------
368 // | |
369 // -----------------
370 // In considering what to do with the small === box, we find the 2 larger
371 // boxes as neighbours and possible merge candidates, but merging with the
372 // upper box increases overlap with the lower box, whereas merging with the
373 // lower box does not increase overlap.
374 // If the small === box didn't overlap either to start with, total area
375 // would be minimized by merging with the nearer (lower) box.
376 //
377 // This is a simple example. In reality, we have to allow some increase
378 // in overlap, or tightly spaced text would end up in bits.
380  const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
382  int* overlap_increase) {
383  if (overlap_increase != NULL)
384  *overlap_increase = 0;
385  if (candidates->empty())
386  return NULL;
387  int ok_overlap =
388  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
389  // The best neighbour to merge with is the one that causes least
390  // total pairwise overlap among all the neighbours.
391  // If more than one offers the same total overlap, choose the one
392  // with the least total area.
393  const TBOX& part_box = part->bounding_box();
394  ColPartition_C_IT it(candidates);
395  ColPartition* best_candidate = NULL;
396  // Find the total combined box of all candidates and the original.
397  TBOX full_box(part_box);
398  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
399  ColPartition* candidate = it.data();
400  full_box += candidate->bounding_box();
401  }
402  // Keep valid neighbours in a list.
403  ColPartition_CLIST neighbours;
404  // Now run a rect search of the merged box for overlapping neighbours, as
405  // we need anything that might be overlapped by the merged box.
406  FindOverlappingPartitions(full_box, part, &neighbours);
407  if (debug) {
408  tprintf("Finding best merge candidate from %d, %d neighbours for box:",
409  candidates->length(), neighbours.length());
410  part_box.print();
411  }
412  // If the best increase in overlap is positive, then we also check the
413  // worst non-candidate overlap. This catches the case of multiple good
414  // candidates that overlap each other when merged. If the worst
415  // non-candidate overlap is better than the best overlap, then return
416  // the worst non-candidate overlap instead.
417  ColPartition_CLIST non_candidate_neighbours;
418  non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
419  &neighbours, candidates);
420  int worst_nc_increase = 0;
421  int best_increase = MAX_INT32;
422  int best_area = 0;
423  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
424  ColPartition* candidate = it.data();
425  if (confirm_cb != NULL && !confirm_cb->Run(part, candidate)) {
426  if (debug) {
427  tprintf("Candidate not confirmed:");
428  candidate->bounding_box().print();
429  }
430  continue;
431  }
432  int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
433  const TBOX& cand_box = candidate->bounding_box();
434  if (best_candidate == NULL || increase < best_increase) {
435  best_candidate = candidate;
436  best_increase = increase;
437  best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
438  if (debug) {
439  tprintf("New best merge candidate has increase %d, area %d, over box:",
440  increase, best_area);
441  full_box.print();
442  candidate->Print();
443  }
444  } else if (increase == best_increase) {
445  int area = cand_box.bounding_union(part_box).area() - cand_box.area();
446  if (area < best_area) {
447  best_area = area;
448  best_candidate = candidate;
449  }
450  }
451  increase = IncreaseInOverlap(part, candidate, ok_overlap,
452  &non_candidate_neighbours);
453  if (increase > worst_nc_increase)
454  worst_nc_increase = increase;
455  }
456  if (best_increase > 0) {
457  // If the worst non-candidate increase is less than the best increase
458  // including the candidates, then all the candidates can merge together
459  // and the increase in outside overlap would be less, so use that result,
460  // but only if each candidate is either a good diacritic merge with part,
461  // or an ok merge candidate with all the others.
462  // See TestCompatibleCandidates for more explanation and a picture.
463  if (worst_nc_increase < best_increase &&
464  TestCompatibleCandidates(*part, debug, candidates)) {
465  best_increase = worst_nc_increase;
466  }
467  }
468  if (overlap_increase != NULL)
469  *overlap_increase = best_increase;
470  return best_candidate;
471 }
472 
473 // Helper to remove the given box from the given partition, put it in its
474 // own partition, and add to the partition list.
475 static void RemoveBadBox(BLOBNBOX* box, ColPartition* part,
476  ColPartition_LIST* part_list) {
477  part->RemoveBox(box);
478  ColPartition::MakeBigPartition(box, part_list);
479 }
480 
481 
482 // Split partitions where it reduces overlap between their bounding boxes.
483 // ColPartitions are after all supposed to be a partitioning of the blobs
484 // AND of the space on the page!
485 // Blobs that cause overlaps get removed, put in individual partitions
486 // and added to the big_parts list. They are most likely characters on
487 // 2 textlines that touch, or something big like a dropcap.
489  ColPartition_LIST* big_parts) {
490  int ok_overlap =
491  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
492  // Iterate the ColPartitions in the grid.
493  ColPartitionGridSearch gsearch(this);
494  gsearch.StartFullSearch();
495  ColPartition* part;
496  while ((part = gsearch.NextFullSearch()) != NULL) {
497  // Set up a rectangle search bounded by the part.
498  const TBOX& box = part->bounding_box();
499  ColPartitionGridSearch rsearch(this);
500  rsearch.SetUniqueMode(true);
501  rsearch.StartRectSearch(box);
502  int unresolved_overlaps = 0;
503 
504  ColPartition* neighbour;
505  while ((neighbour = rsearch.NextRectSearch()) != NULL) {
506  if (neighbour == part)
507  continue;
508  const TBOX& neighbour_box = neighbour->bounding_box();
509  if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
510  part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false))
511  continue; // The overlap is OK both ways.
512 
513  // If removal of the biggest box from either partition eliminates the
514  // overlap, and it is much bigger than the box left behind, then
515  // it is either a drop-cap, an inter-line join, or some junk that
516  // we don't want anyway, so put it in the big_parts list.
517  if (!part->IsSingleton()) {
518  BLOBNBOX* excluded = part->BiggestBox();
519  TBOX shrunken = part->BoundsWithoutBox(excluded);
520  if (!shrunken.overlap(neighbour_box) &&
521  excluded->bounding_box().height() >
522  kBigPartSizeRatio * shrunken.height()) {
523  // Removing the biggest box fixes the overlap, so do it!
524  gsearch.RemoveBBox();
525  RemoveBadBox(excluded, part, big_parts);
526  InsertBBox(true, true, part);
527  gsearch.RepositionIterator();
528  break;
529  }
530  } else if (box.contains(neighbour_box)) {
531  ++unresolved_overlaps;
532  continue; // No amount of splitting will fix it.
533  }
534  if (!neighbour->IsSingleton()) {
535  BLOBNBOX* excluded = neighbour->BiggestBox();
536  TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
537  if (!shrunken.overlap(box) &&
538  excluded->bounding_box().height() >
539  kBigPartSizeRatio * shrunken.height()) {
540  // Removing the biggest box fixes the overlap, so do it!
541  rsearch.RemoveBBox();
542  RemoveBadBox(excluded, neighbour, big_parts);
543  InsertBBox(true, true, neighbour);
544  gsearch.RepositionIterator();
545  break;
546  }
547  }
548  int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
549  int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
550  ColPartition* right_part = NULL;
551  if (neighbour_overlap_count <= part_overlap_count ||
552  part->IsSingleton()) {
553  // Try to split the neighbour to reduce overlap.
554  BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box);
555  if (split_blob != NULL) {
556  rsearch.RemoveBBox();
557  right_part = neighbour->SplitAtBlob(split_blob);
558  InsertBBox(true, true, neighbour);
559  ASSERT_HOST(right_part != NULL);
560  }
561  } else {
562  // Try to split part to reduce overlap.
563  BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box);
564  if (split_blob != NULL) {
565  gsearch.RemoveBBox();
566  right_part = part->SplitAtBlob(split_blob);
567  InsertBBox(true, true, part);
568  ASSERT_HOST(right_part != NULL);
569  }
570  }
571  if (right_part != NULL) {
572  InsertBBox(true, true, right_part);
573  gsearch.RepositionIterator();
574  rsearch.RepositionIterator();
575  break;
576  }
577  }
578  if (unresolved_overlaps > 2 && part->IsSingleton()) {
579  // This part is no good so just add to big_parts.
580  RemoveBBox(part);
581  ColPartition_IT big_it(big_parts);
582  part->set_block_owned(true);
583  big_it.add_to_end(part);
584  gsearch.RepositionIterator();
585  }
586  }
587 }
588 
589 // Filters partitions of source_type by looking at local neighbours.
590 // Where a majority of neighbours have a text type, the partitions are
591 // changed to text, where the neighbours have image type, they are changed
592 // to image, and partitions that have no definite neighbourhood type are
593 // left unchanged.
594 // im_box and rerotation are used to map blob coordinates onto the
595 // nontext_map, which is used to prevent the spread of text neighbourhoods
596 // into images.
597 // Returns true if anything was changed.
599  Pix* nontext_map,
600  const TBOX& im_box,
601  const FCOORD& rotation) {
602  // Iterate the ColPartitions in the grid.
603  ColPartitionGridSearch gsearch(this);
604  gsearch.StartFullSearch();
605  ColPartition* part;
606  bool any_changed = false;
607  while ((part = gsearch.NextFullSearch()) != NULL) {
608  if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type()))
609  continue;
610  const TBOX& box = part->bounding_box();
611  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
612  if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
613  any_changed = true;
614  }
615  return any_changed;
616 }
617 
618 // Compute the mean RGB of the light and dark pixels in each ColPartition
619 // and also the rms error in the linearity of color.
621  int scaled_factor,
622  const FCOORD& rerotation) {
623  if (scaled_color == NULL)
624  return;
625  Pix* color_map1 = NULL;
626  Pix* color_map2 = NULL;
627  Pix* rms_map = NULL;
629  int width = pixGetWidth(scaled_color);
630  int height = pixGetHeight(scaled_color);
631  color_map1 = pixCreate(width, height, 32);
632  color_map2 = pixCreate(width, height, 32);
633  rms_map = pixCreate(width, height, 8);
634  }
635  // Iterate the ColPartitions in the grid.
636  ColPartitionGridSearch gsearch(this);
637  gsearch.StartFullSearch();
638  ColPartition* part;
639  while ((part = gsearch.NextFullSearch()) != NULL) {
640  TBOX part_box = part->bounding_box();
641  part_box.rotate_large(rerotation);
642  ImageFind::ComputeRectangleColors(part_box, scaled_color,
643  scaled_factor,
644  color_map1, color_map2, rms_map,
645  part->color1(), part->color2());
646  }
647  if (color_map1 != NULL) {
648  pixWrite("swcolorinput.png", scaled_color, IFF_PNG);
649  pixWrite("swcolor1.png", color_map1, IFF_PNG);
650  pixWrite("swcolor2.png", color_map2, IFF_PNG);
651  pixWrite("swrms.png", rms_map, IFF_PNG);
652  pixDestroy(&color_map1);
653  pixDestroy(&color_map2);
654  pixDestroy(&rms_map);
655  }
656 }
657 
658 // Reflects the grid and its colpartitions in the y-axis, assuming that
659 // all blob boxes have already been done.
661  ColPartition_LIST parts;
662  ColPartition_IT part_it(&parts);
663  // Iterate the ColPartitions in the grid to extract them.
664  ColPartitionGridSearch gsearch(this);
665  gsearch.StartFullSearch();
666  ColPartition* part;
667  while ((part = gsearch.NextFullSearch()) != NULL) {
668  part_it.add_after_then_move(part);
669  }
670  ICOORD bot_left(-tright().x(), bleft().y());
671  ICOORD top_right(-bleft().x(), tright().y());
672  // Reinitializing the grid with reflected coords also clears all the
673  // pointers, so parts will now own the ColPartitions. (Briefly).
674  Init(gridsize(), bot_left, top_right);
675  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
676  part = part_it.extract();
677  part->ReflectInYAxis();
678  InsertBBox(true, true, part);
679  }
680 }
681 
682 // Rotates the grid and its colpartitions by the given angle, assuming that
683 // all blob boxes have already been done.
684 void ColPartitionGrid::Deskew(const FCOORD& deskew) {
685  ColPartition_LIST parts;
686  ColPartition_IT part_it(&parts);
687  // Iterate the ColPartitions in the grid to extract them.
688  ColPartitionGridSearch gsearch(this);
689  gsearch.StartFullSearch();
690  ColPartition* part;
691  while ((part = gsearch.NextFullSearch()) != NULL) {
692  part_it.add_after_then_move(part);
693  }
694  // Rebuild the grid to the new size.
695  TBOX grid_box(bleft_, tright_);
696  grid_box.rotate_large(deskew);
697  Init(gridsize(), grid_box.botleft(), grid_box.topright());
698  // Reinitializing the grid with rotated coords also clears all the
699  // pointers, so parts will now own the ColPartitions. (Briefly).
700  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
701  part = part_it.extract();
702  part->ComputeLimits();
703  InsertBBox(true, true, part);
704  }
705 }
706 
707 // Sets the left and right tabs of the partitions in the grid.
709  // Iterate the ColPartitions in the grid.
710  ColPartitionGridSearch gsearch(this);
711  gsearch.StartFullSearch();
712  ColPartition* part;
713  while ((part = gsearch.NextFullSearch()) != NULL) {
714  const TBOX& part_box = part->bounding_box();
715  TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false);
716  // If the overlapping line is not a left tab, try for non-overlapping.
717  if (left_line != NULL && !left_line->IsLeftTab())
718  left_line = tabgrid->LeftTabForBox(part_box, false, false);
719  if (left_line != NULL && left_line->IsLeftTab())
720  part->SetLeftTab(left_line);
721  TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false);
722  if (right_line != NULL && !right_line->IsRightTab())
723  right_line = tabgrid->RightTabForBox(part_box, false, false);
724  if (right_line != NULL && right_line->IsRightTab())
725  part->SetRightTab(right_line);
726  part->SetColumnGoodness(tabgrid->WidthCB());
727  }
728 }
729 
730 // Makes the ColPartSets and puts them in the PartSetVector ready
731 // for finding column bounds. Returns false if no partitions were found.
733  ColPartition_LIST* part_lists = new ColPartition_LIST[gridheight()];
734  part_sets->reserve(gridheight());
735  // Iterate the ColPartitions in the grid to get parts onto lists for the
736  // y bottom of each.
737  ColPartitionGridSearch gsearch(this);
738  gsearch.StartFullSearch();
739  ColPartition* part;
740  bool any_parts_found = false;
741  while ((part = gsearch.NextFullSearch()) != NULL) {
742  BlobRegionType blob_type = part->blob_type();
743  if (blob_type != BRT_NOISE &&
744  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
745  int grid_x, grid_y;
746  const TBOX& part_box = part->bounding_box();
747  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
748  ColPartition_IT part_it(&part_lists[grid_y]);
749  part_it.add_to_end(part);
750  any_parts_found = true;
751  }
752  }
753  if (any_parts_found) {
754  for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
755  ColPartitionSet* line_set = NULL;
756  if (!part_lists[grid_y].empty()) {
757  line_set = new ColPartitionSet(&part_lists[grid_y]);
758  }
759  part_sets->push_back(line_set);
760  }
761  }
762  delete [] part_lists;
763  return any_parts_found;
764 }
765 
766 // Makes a single ColPartitionSet consisting of a single ColPartition that
767 // represents the total horizontal extent of the significant content on the
768 // page. Used for the single column setting in place of automatic detection.
769 // Returns NULL if the page is empty of significant content.
771  ColPartition* single_column_part = NULL;
772  // Iterate the ColPartitions in the grid to get parts onto lists for the
773  // y bottom of each.
774  ColPartitionGridSearch gsearch(this);
775  gsearch.StartFullSearch();
776  ColPartition* part;
777  while ((part = gsearch.NextFullSearch()) != NULL) {
778  BlobRegionType blob_type = part->blob_type();
779  if (blob_type != BRT_NOISE &&
780  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
781  // Consider for single column.
782  BlobTextFlowType flow = part->flow();
783  if ((blob_type == BRT_TEXT &&
784  (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
785  flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
786  blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
787  if (single_column_part == NULL) {
788  single_column_part = part->ShallowCopy();
789  single_column_part->set_blob_type(BRT_TEXT);
790  // Copy the tabs from itself to properly setup the margins.
791  single_column_part->CopyLeftTab(*single_column_part, false);
792  single_column_part->CopyRightTab(*single_column_part, false);
793  } else {
794  if (part->left_key() < single_column_part->left_key())
795  single_column_part->CopyLeftTab(*part, false);
796  if (part->right_key() > single_column_part->right_key())
797  single_column_part->CopyRightTab(*part, false);
798  }
799  }
800  }
801  }
802  if (single_column_part != NULL) {
803  // Make a ColPartitionSet out of the single_column_part as a candidate
804  // for the single column case.
805  single_column_part->SetColumnGoodness(cb);
806  return new ColPartitionSet(single_column_part);
807  }
808  return NULL;
809 }
810 
811 // Mark the BLOBNBOXes in each partition as being owned by that partition.
813  // Iterate the ColPartitions in the grid.
814  ColPartitionGridSearch gsearch(this);
815  gsearch.StartFullSearch();
816  ColPartition* part;
817  while ((part = gsearch.NextFullSearch()) != NULL) {
818  part->ClaimBoxes();
819  }
820 }
821 
822 // Retypes all the blobs referenced by the partitions in the grid.
823 // Image blobs are found and returned in the im_blobs list, as they are not
824 // owned by the block.
825 void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) {
826  BLOBNBOX_IT im_blob_it(im_blobs);
827  ColPartition_LIST dead_parts;
828  ColPartition_IT dead_part_it(&dead_parts);
829  // Iterate the ColPartitions in the grid.
830  ColPartitionGridSearch gsearch(this);
831  gsearch.StartFullSearch();
832  ColPartition* part;
833  while ((part = gsearch.NextFullSearch()) != NULL) {
834  BlobRegionType blob_type = part->blob_type();
835  BlobTextFlowType flow = part->flow();
836  if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
837  BLOBNBOX_C_IT blob_it(part->boxes());
838  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
839  BLOBNBOX* blob = blob_it.data();
840  im_blob_it.add_after_then_move(blob);
841  }
842  } else if (blob_type != BRT_NOISE) {
843  // Make sure the blobs are marked with the correct type and flow.
844  BLOBNBOX_C_IT blob_it(part->boxes());
845  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
846  BLOBNBOX* blob = blob_it.data();
847  if (blob->region_type() == BRT_NOISE) {
848  // TODO(rays) Deprecated. Change this section to an assert to verify
849  // and then delete.
850  ASSERT_HOST(blob->cblob()->area() != 0);
851  blob->set_owner(NULL);
852  blob_it.extract();
853  } else {
854  blob->set_region_type(blob_type);
855  if (blob->flow() != BTFT_LEADER)
856  blob->set_flow(flow);
857  }
858  }
859  }
860  if (blob_type == BRT_NOISE || part->boxes()->empty()) {
861  BLOBNBOX_C_IT blob_it(part->boxes());
862  part->DisownBoxes();
863  dead_part_it.add_to_end(part);
864  gsearch.RemoveBBox();
865  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
866  BLOBNBOX* blob = blob_it.data();
867  if (blob->cblob()->area() == 0) {
868  // Any blob with zero area is a fake image blob and should be deleted.
869  delete blob->cblob();
870  delete blob;
871  }
872  }
873  }
874  }
875 }
876 
877 // The boxes within the partitions have changed (by deskew) so recompute
878 // the bounds of all the partitions and reinsert them into the grid.
880  const ICOORD& bleft,
881  const ICOORD& tright,
882  const ICOORD& vertical) {
883  ColPartition_LIST saved_parts;
884  ColPartition_IT part_it(&saved_parts);
885  // Iterate the ColPartitions in the grid to get parts onto a list.
886  ColPartitionGridSearch gsearch(this);
887  gsearch.StartFullSearch();
888  ColPartition* part;
889  while ((part = gsearch.NextFullSearch()) != NULL) {
890  part_it.add_to_end(part);
891  }
892  // Reinitialize grid to the new size.
893  Init(gridsize, bleft, tright);
894  // Recompute the bounds of the parts and put them back in the new grid.
895  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
896  part = part_it.extract();
897  part->set_vertical(vertical);
898  part->ComputeLimits();
899  InsertBBox(true, true, part);
900  }
901 }
902 
903 // Improves the margins of the ColPartitions in the grid by calling
904 // FindPartitionMargins on each.
905 // best_columns, which may be NULL, is an array of pointers indicating the
906 // column set at each y-coordinate in the grid.
907 // best_columns is usually the best_columns_ member of ColumnFinder.
909  // Iterate the ColPartitions in the grid.
910  ColPartitionGridSearch gsearch(this);
911  gsearch.StartFullSearch();
912  ColPartition* part;
913  while ((part = gsearch.NextFullSearch()) != NULL) {
914  // Set up a rectangle search x-bounded by the column and y by the part.
915  ColPartitionSet* columns = best_columns != NULL
916  ? best_columns[gsearch.GridY()]
917  : NULL;
918  FindPartitionMargins(columns, part);
919  const TBOX& box = part->bounding_box();
920  if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
921  tprintf("Computed margins for part:");
922  part->Print();
923  }
924  }
925 }
926 
927 // Improves the margins of the ColPartitions in the list by calling
928 // FindPartitionMargins on each.
929 // best_columns, which may be NULL, is an array of pointers indicating the
930 // column set at each y-coordinate in the grid.
931 // best_columns is usually the best_columns_ member of ColumnFinder.
933  ColPartition_LIST* parts) {
934  ColPartition_IT part_it(parts);
935  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
936  ColPartition* part = part_it.data();
937  ColPartitionSet* columns = NULL;
938  if (best_columns != NULL) {
939  TBOX part_box = part->bounding_box();
940  // Get the columns from the y grid coord.
941  int grid_x, grid_y;
942  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
943  columns = best_columns[grid_y];
944  }
945  FindPartitionMargins(columns, part);
946  }
947 }
948 
949 // Deletes all the partitions in the grid after disowning all the blobs.
951  ColPartition_LIST dead_parts;
952  ColPartition_IT dead_it(&dead_parts);
953  ColPartitionGridSearch gsearch(this);
954  gsearch.StartFullSearch();
955  ColPartition* part;
956  while ((part = gsearch.NextFullSearch()) != NULL) {
957  part->DisownBoxes();
958  dead_it.add_to_end(part); // Parts will be deleted on return.
959  }
960  Clear();
961 }
962 
963 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
964 // all the blobs in them.
966  ColPartitionGridSearch gsearch(this);
967  gsearch.StartFullSearch();
968  ColPartition* part;
969  while ((part = gsearch.NextFullSearch()) != NULL) {
970  if (part->blob_type() == BRT_UNKNOWN) {
971  gsearch.RemoveBBox();
972  // Once marked, the blobs will be swept up by DeleteUnownedNoise.
973  part->set_flow(BTFT_NONTEXT);
974  part->set_blob_type(BRT_NOISE);
975  part->SetBlobTypes();
976  part->DisownBoxes();
977  delete part;
978  }
979  }
980  block->DeleteUnownedNoise();
981 }
982 
983 // Finds and marks text partitions that represent figure captions.
985  // For each image region find its best candidate text caption region,
986  // if any and mark it as such.
987  ColPartitionGridSearch gsearch(this);
988  gsearch.StartFullSearch();
989  ColPartition* part;
990  while ((part = gsearch.NextFullSearch()) != NULL) {
991  if (part->IsImageType()) {
992  const TBOX& part_box = part->bounding_box();
993  bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(),
994  part_box.bottom());
995  ColPartition* best_caption = NULL;
996  int best_dist = 0; // Distance to best_caption.
997  int best_upper = 0; // Direction of best_caption.
998  // Handle both lower and upper directions.
999  for (int upper = 0; upper < 2; ++upper) {
1000  ColPartition_C_IT partner_it(upper ? part->upper_partners()
1001  : part->lower_partners());
1002  // If there are no image partners, then this direction is ok.
1003  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1004  partner_it.forward()) {
1005  ColPartition* partner = partner_it.data();
1006  if (partner->IsImageType()) {
1007  break;
1008  }
1009  }
1010  if (!partner_it.cycled_list()) continue;
1011  // Find the nearest totally overlapping text partner.
1012  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1013  partner_it.forward()) {
1014  ColPartition* partner = partner_it.data();
1015  if (!partner->IsTextType()) continue;
1016  const TBOX& partner_box = partner->bounding_box();
1017  if (debug) {
1018  tprintf("Finding figure captions for image part:");
1019  part_box.print();
1020  tprintf("Considering partner:");
1021  partner_box.print();
1022  }
1023  if (partner_box.left() >= part_box.left() &&
1024  partner_box.right() <= part_box.right()) {
1025  int dist = partner_box.y_gap(part_box);
1026  if (best_caption == NULL || dist < best_dist) {
1027  best_dist = dist;
1028  best_caption = partner;
1029  best_upper = upper;
1030  }
1031  }
1032  }
1033  }
1034  if (best_caption != NULL) {
1035  if (debug) {
1036  tprintf("Best caption candidate:");
1037  best_caption->bounding_box().print();
1038  }
1039  // We have a candidate caption. Qualify it as being separable from
1040  // any body text. We are looking for either a small number of lines
1041  // or a big gap that indicates a separation from the body text.
1042  int line_count = 0;
1043  int biggest_gap = 0;
1044  int smallest_gap = MAX_INT16;
1045  int total_height = 0;
1046  int mean_height = 0;
1047  ColPartition* end_partner = NULL;
1048  ColPartition* next_partner = NULL;
1049  for (ColPartition* partner = best_caption; partner != NULL &&
1050  line_count <= kMaxCaptionLines;
1051  partner = next_partner) {
1052  if (!partner->IsTextType()) {
1053  end_partner = partner;
1054  break;
1055  }
1056  ++line_count;
1057  total_height += partner->bounding_box().height();
1058  next_partner = partner->SingletonPartner(best_upper);
1059  if (next_partner != NULL) {
1060  int gap = partner->bounding_box().y_gap(
1061  next_partner->bounding_box());
1062  if (gap > biggest_gap) {
1063  biggest_gap = gap;
1064  end_partner = next_partner;
1065  mean_height = total_height / line_count;
1066  } else if (gap < smallest_gap) {
1067  smallest_gap = gap;
1068  }
1069  // If the gap looks big compared to the text size and the smallest
1070  // gap seen so far, then we can stop.
1071  if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
1072  biggest_gap > smallest_gap * kMinCaptionGapRatio)
1073  break;
1074  }
1075  }
1076  if (debug) {
1077  tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
1078  line_count, biggest_gap, smallest_gap, mean_height);
1079  if (end_partner != NULL) {
1080  tprintf("End partner:");
1081  end_partner->bounding_box().print();
1082  }
1083  }
1084  if (next_partner == NULL && line_count <= kMaxCaptionLines)
1085  end_partner = NULL; // No gap, but line count is small.
1086  if (line_count <= kMaxCaptionLines) {
1087  // This is a qualified caption. Mark the text as caption.
1088  for (ColPartition* partner = best_caption; partner != NULL &&
1089  partner != end_partner;
1090  partner = next_partner) {
1091  partner->set_type(PT_CAPTION_TEXT);
1092  partner->SetBlobTypes();
1093  if (debug) {
1094  tprintf("Set caption type for partition:");
1095  partner->bounding_box().print();
1096  }
1097  next_partner = partner->SingletonPartner(best_upper);
1098  }
1099  }
1100  }
1101  }
1102  }
1103 }
1104 
1107 
1108 // For every ColPartition in the grid, finds its upper and lower neighbours.
1110  ColPartitionGridSearch gsearch(this);
1111  gsearch.StartFullSearch();
1112  ColPartition* part;
1113  while ((part = gsearch.NextFullSearch()) != NULL) {
1114  if (part->IsVerticalType()) {
1115  FindVPartitionPartners(true, part);
1116  FindVPartitionPartners(false, part);
1117  } else {
1118  FindPartitionPartners(true, part);
1119  FindPartitionPartners(false, part);
1120  }
1121  }
1122 }
1123 
1124 // Finds the best partner in the given direction for the given partition.
1125 // Stores the result with AddPartner.
1127  if (part->type() == PT_NOISE)
1128  return; // Noise is not allowed to partner anything.
1129  const TBOX& box = part->bounding_box();
1130  int top = part->median_top();
1131  int bottom = part->median_bottom();
1132  int height = top - bottom;
1133  int mid_y = (bottom + top) / 2;
1134  ColPartitionGridSearch vsearch(this);
1135  // Search down for neighbour below
1136  vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
1137  ColPartition* neighbour;
1138  ColPartition* best_neighbour = NULL;
1139  int best_dist = MAX_INT32;
1140  while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) {
1141  if (neighbour == part || neighbour->type() == PT_NOISE)
1142  continue; // Noise is not allowed to partner anything.
1143  int neighbour_bottom = neighbour->median_bottom();
1144  int neighbour_top = neighbour->median_top();
1145  int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1146  if (upper != (neighbour_y > mid_y))
1147  continue;
1148  if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour))
1149  continue;
1150  if (!part->TypesMatch(*neighbour)) {
1151  if (best_neighbour == NULL)
1152  best_neighbour = neighbour;
1153  continue;
1154  }
1155  int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
1156  if (dist <= kMaxPartitionSpacing * height) {
1157  if (dist < best_dist) {
1158  best_dist = dist;
1159  best_neighbour = neighbour;
1160  }
1161  } else {
1162  break;
1163  }
1164  }
1165  if (best_neighbour != NULL)
1166  part->AddPartner(upper, best_neighbour);
1167 }
1168 
1169 // Finds the best partner in the given direction for the given partition.
1170 // Stores the result with AddPartner.
1172  ColPartition* part) {
1173  if (part->type() == PT_NOISE)
1174  return; // Noise is not allowed to partner anything.
1175  const TBOX& box = part->bounding_box();
1176  int left = part->median_left();
1177  int right = part->median_right();
1178  int width = right - left;
1179  int mid_x = (left + right) / 2;
1180  ColPartitionGridSearch hsearch(this);
1181  // Search left for neighbour to_the_left
1182  hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
1183  ColPartition* neighbour;
1184  ColPartition* best_neighbour = NULL;
1185  int best_dist = MAX_INT32;
1186  while ((neighbour = hsearch.NextSideSearch(to_the_left)) != NULL) {
1187  if (neighbour == part || neighbour->type() == PT_NOISE)
1188  continue; // Noise is not allowed to partner anything.
1189  int neighbour_left = neighbour->median_left();
1190  int neighbour_right = neighbour->median_right();
1191  int neighbour_x = (neighbour_left + neighbour_right) / 2;
1192  if (to_the_left != (neighbour_x < mid_x))
1193  continue;
1194  if (!part->VOverlaps(*neighbour))
1195  continue;
1196  if (!part->TypesMatch(*neighbour))
1197  continue; // Only match to other vertical text.
1198  int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
1199  if (dist <= kMaxPartitionSpacing * width) {
1200  if (dist < best_dist || best_neighbour == NULL) {
1201  best_dist = dist;
1202  best_neighbour = neighbour;
1203  }
1204  } else {
1205  break;
1206  }
1207  }
1208  // For vertical partitions, the upper partner is to the left, and lower is
1209  // to the right.
1210  if (best_neighbour != NULL)
1211  part->AddPartner(to_the_left, best_neighbour);
1212 }
1213 
1214 // For every ColPartition with multiple partners in the grid, reduces the
1215 // number of partners to 0 or 1. If get_desperate is true, goes to more
1216 // desperate merge methods to merge flowing text before breaking partnerships.
1218  ColPartitionGridSearch gsearch(this);
1219  // Refine in type order so that chasing multiple partners can be done
1220  // before eliminating type mis-matching partners.
1221  for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
1222  // Iterate the ColPartitions in the grid.
1223  gsearch.StartFullSearch();
1224  ColPartition* part;
1225  while ((part = gsearch.NextFullSearch()) != NULL) {
1226  part->RefinePartners(static_cast<PolyBlockType>(type),
1227  get_desperate, this);
1228  // Iterator may have been messed up by a merge.
1229  gsearch.RepositionIterator();
1230  }
1231  }
1232 }
1233 
1234 
1235 // ========================== PRIVATE CODE ========================
1236 
1237 // Finds and returns a list of candidate ColPartitions to merge with part.
1238 // The candidates must overlap search_box, and when merged must not
1239 // overlap any other partitions that are not overlapped by each individually.
1240 void ColPartitionGrid::FindMergeCandidates(const ColPartition* part,
1241  const TBOX& search_box, bool debug,
1242  ColPartition_CLIST* candidates) {
1243  int ok_overlap =
1244  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
1245  const TBOX& part_box = part->bounding_box();
1246  // Now run the rect search.
1247  ColPartitionGridSearch rsearch(this);
1248  rsearch.SetUniqueMode(true);
1249  rsearch.StartRectSearch(search_box);
1250  ColPartition* candidate;
1251  while ((candidate = rsearch.NextRectSearch()) != NULL) {
1252  if (!OKMergeCandidate(part, candidate, debug))
1253  continue;
1254  const TBOX& c_box = candidate->bounding_box();
1255  // Candidate seems to be a potential merge with part. If one contains
1256  // the other, then the merge is a no-brainer. Otherwise, search the
1257  // combined box to see if anything else is inappropriately overlapped.
1258  if (!part_box.contains(c_box) && !c_box.contains(part_box)) {
1259  // Search the combined rectangle to see if anything new is overlapped.
1260  // This is a preliminary test designed to quickly weed-out stupid
1261  // merge candidates that would create a big list of overlapped objects
1262  // for the squared-order overlap analysis. Eg. vertical and horizontal
1263  // line-like objects that overlap real text when merged:
1264  // || ==========================
1265  // ||
1266  // || r e a l t e x t
1267  // ||
1268  // ||
1269  TBOX merged_box(part_box);
1270  merged_box += c_box;
1271  ColPartitionGridSearch msearch(this);
1272  msearch.SetUniqueMode(true);
1273  msearch.StartRectSearch(merged_box);
1274  ColPartition* neighbour;
1275  while ((neighbour = msearch.NextRectSearch()) != NULL) {
1276  if (neighbour == part || neighbour == candidate)
1277  continue; // Ignore itself.
1278  if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false))
1279  continue; // This kind of merge overlap is OK.
1280  TBOX n_box = neighbour->bounding_box();
1281  // The overlap is OK if:
1282  // * the n_box already overlapped the part or the candidate OR
1283  // * the n_box is a suitable merge with either part or candidate
1284  if (!n_box.overlap(part_box) && !n_box.overlap(c_box) &&
1285  !OKMergeCandidate(part, neighbour, false) &&
1286  !OKMergeCandidate(candidate, neighbour, false))
1287  break;
1288  }
1289  if (neighbour != NULL) {
1290  if (debug) {
1291  tprintf("Combined box overlaps another that is not OK despite"
1292  " allowance of %d:", ok_overlap);
1293  neighbour->bounding_box().print();
1294  tprintf("Reason:");
1295  OKMergeCandidate(part, neighbour, true);
1296  tprintf("...and:");
1297  OKMergeCandidate(candidate, neighbour, true);
1298  tprintf("Overlap:");
1299  neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true);
1300  }
1301  continue;
1302  }
1303  }
1304  if (debug) {
1305  tprintf("Adding candidate:");
1306  candidate->bounding_box().print();
1307  }
1308  // Unique elements as they arrive.
1309  candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate);
1310  }
1311 }
1312 
1313 // Smoothes the region type/flow type of the given part by looking at local
1314 // neigbours and the given image mask. Searches a padded rectangle with the
1315 // padding truncated on one size of the part's box in turn for each side,
1316 // using the result (if any) that has the least distance to all neighbours
1317 // that contribute to the decision. This biases in favor of rectangular
1318 // regions without completely enforcing them.
1319 // If a good decision cannot be reached, the part is left unchanged.
1320 // im_box and rerotation are used to map blob coordinates onto the
1321 // nontext_map, which is used to prevent the spread of text neighbourhoods
1322 // into images.
1323 // Returns true if the partition was changed.
1324 bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map,
1325  const TBOX& im_box,
1326  const FCOORD& rerotation,
1327  bool debug,
1328  ColPartition* part) {
1329  const TBOX& part_box = part->bounding_box();
1330  if (debug) {
1331  tprintf("Smooothing part at:");
1332  part_box.print();
1333  }
1334  BlobRegionType best_type = BRT_UNKNOWN;
1335  int best_dist = MAX_INT32;
1336  int max_dist = MIN(part_box.width(), part_box.height());
1337  max_dist = MAX(max_dist * kMaxNeighbourDistFactor, gridsize() * 2);
1338  // Search with the pad truncated on each side of the box in turn.
1339  bool any_image = false;
1340  bool all_image = true;
1341  for (int d = 0; d < BND_COUNT; ++d) {
1342  int dist;
1343  BlobNeighbourDir dir = static_cast<BlobNeighbourDir>(d);
1344  BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box,
1345  rerotation, debug, *part,
1346  &dist);
1347  if (debug) {
1348  tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist);
1349  }
1350  if (type != BRT_UNKNOWN && dist < best_dist) {
1351  best_dist = dist;
1352  best_type = type;
1353  }
1354  if (type == BRT_POLYIMAGE)
1355  any_image = true;
1356  else
1357  all_image = false;
1358  }
1359  if (best_dist > max_dist)
1360  return false; // Too far away to set the type with it.
1361  if (part->flow() == BTFT_STRONG_CHAIN && !all_image) {
1362  return false; // We are not modifying it.
1363  }
1364  BlobRegionType new_type = part->blob_type();
1365  BlobTextFlowType new_flow = part->flow();
1366  if (best_type == BRT_TEXT && !any_image) {
1367  new_flow = BTFT_STRONG_CHAIN;
1368  new_type = BRT_TEXT;
1369  } else if (best_type == BRT_VERT_TEXT && !any_image) {
1370  new_flow = BTFT_STRONG_CHAIN;
1371  new_type = BRT_VERT_TEXT;
1372  } else if (best_type == BRT_POLYIMAGE) {
1373  new_flow = BTFT_NONTEXT;
1374  new_type = BRT_UNKNOWN;
1375  }
1376  if (new_type != part->blob_type() || new_flow != part->flow()) {
1377  part->set_flow(new_flow);
1378  part->set_blob_type(new_type);
1379  part->SetBlobTypes();
1380  if (debug) {
1381  tprintf("Modified part:");
1382  part->Print();
1383  }
1384  return true;
1385  } else {
1386  return false;
1387  }
1388 }
1389 
1390 // Sets up a search box based on the part_box, padded in all directions
1391 // except direction. Also setup dist_scaling to weight x,y distances according
1392 // to the given direction.
1393 static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction,
1394  const TBOX& part_box,
1395  int min_padding,
1396  TBOX* search_box,
1397  ICOORD* dist_scaling) {
1398  *search_box = part_box;
1399  // Generate a pad value based on the min dimension of part_box, but at least
1400  // min_padding and then scaled by kMaxPadFactor.
1401  int padding = MIN(part_box.height(), part_box.width());
1402  padding = MAX(padding, min_padding);
1403  padding *= kMaxPadFactor;
1404  search_box->pad(padding, padding);
1405  // Truncate the box in the appropriate direction and make the distance
1406  // metric slightly biased in the truncated direction.
1407  switch (direction) {
1408  case BND_LEFT:
1409  search_box->set_left(part_box.left());
1410  *dist_scaling = ICOORD(2, 1);
1411  break;
1412  case BND_BELOW:
1413  search_box->set_bottom(part_box.bottom());
1414  *dist_scaling = ICOORD(1, 2);
1415  break;
1416  case BND_RIGHT:
1417  search_box->set_right(part_box.right());
1418  *dist_scaling = ICOORD(2, 1);
1419  break;
1420  case BND_ABOVE:
1421  search_box->set_top(part_box.top());
1422  *dist_scaling = ICOORD(1, 2);
1423  break;
1424  default:
1425  ASSERT_HOST(false);
1426  }
1427 }
1428 
1429 // Local enum used by SmoothInOneDirection and AccumulatePartDistances
1430 // for the different types of partition neighbour.
1432  NPT_HTEXT, // Definite horizontal text.
1433  NPT_VTEXT, // Definite vertical text.
1434  NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but
1435  // image for image and VTEXT.
1436  NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but
1437  // image for image and HTEXT.
1438  NPT_IMAGE, // Defininte non-text.
1439  NPT_COUNT // Number of array elements.
1440 };
1441 
1442 // Executes the search for SmoothRegionType in a single direction.
1443 // Creates a bounding box that is padded in all directions except direction,
1444 // and searches it for other partitions. Finds the nearest collection of
1445 // partitions that makes a decisive result (if any) and returns the type
1446 // and the distance of the collection. If there are any pixels in the
1447 // nontext_map, then the decision is biased towards image.
1448 BlobRegionType ColPartitionGrid::SmoothInOneDirection(
1449  BlobNeighbourDir direction, Pix* nontext_map,
1450  const TBOX& im_box, const FCOORD& rerotation,
1451  bool debug, const ColPartition& part, int* best_distance) {
1452  // Set up a rectangle search bounded by the part.
1453  TBOX part_box = part.bounding_box();
1454  TBOX search_box;
1455  ICOORD dist_scaling;
1456  ComputeSearchBoxAndScaling(direction, part_box, gridsize(),
1457  &search_box, &dist_scaling);
1458  bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box,
1459  rerotation,
1460  nontext_map) > 0;
1462  AccumulatePartDistances(part, dist_scaling, search_box,
1463  nontext_map, im_box, rerotation, debug, dists);
1464  // By iteratively including the next smallest distance across the vectors,
1465  // (as in a merge sort) we can use the vector indices as counts of each type
1466  // and find the nearest set of objects that give us a definite decision.
1467  int counts[NPT_COUNT];
1468  memset(counts, 0, sizeof(counts[0]) * NPT_COUNT);
1469  // If there is image in the search box, tip the balance in image's favor.
1470  int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0;
1471  BlobRegionType text_dir = part.blob_type();
1472  BlobTextFlowType flow_type = part.flow();
1473  int min_dist = 0;
1474  do {
1475  // Find the minimum new entry across the vectors
1476  min_dist = MAX_INT32;
1477  for (int i = 0; i < NPT_COUNT; ++i) {
1478  if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist)
1479  min_dist = dists[i][counts[i]];
1480  }
1481  // Step all the indices/counts forward to include min_dist.
1482  for (int i = 0; i < NPT_COUNT; ++i) {
1483  while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist)
1484  ++counts[i];
1485  }
1486  *best_distance = min_dist;
1487  if (debug) {
1488  tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n",
1489  counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT],
1490  counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT],
1491  counts[NPT_IMAGE], image_bias, min_dist);
1492  }
1493  // See if we have a decision yet.
1494  int image_count = counts[NPT_IMAGE];
1495  int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
1496  (image_count + counts[NPT_WEAK_VTEXT]);
1497  int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
1498  (image_count + counts[NPT_WEAK_HTEXT]);
1499  if (image_count > 0 &&
1500  image_bias - htext_score >= kSmoothDecisionMargin &&
1501  image_bias - vtext_score >= kSmoothDecisionMargin) {
1502  *best_distance = dists[NPT_IMAGE][0];
1503  if (dists[NPT_WEAK_VTEXT].size() > 0 &&
1504  *best_distance > dists[NPT_WEAK_VTEXT][0])
1505  *best_distance = dists[NPT_WEAK_VTEXT][0];
1506  if (dists[NPT_WEAK_HTEXT].size() > 0 &&
1507  *best_distance > dists[NPT_WEAK_HTEXT][0])
1508  *best_distance = dists[NPT_WEAK_HTEXT][0];
1509  return BRT_POLYIMAGE;
1510  }
1511  if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) &&
1512  counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) {
1513  *best_distance = dists[NPT_HTEXT][0];
1514  return BRT_TEXT;
1515  } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) &&
1516  counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) {
1517  *best_distance = dists[NPT_VTEXT][0];
1518  return BRT_VERT_TEXT;
1519  }
1520  } while (min_dist < MAX_INT32);
1521  return BRT_UNKNOWN;
1522 }
1523 
1524 // Counts the partitions in the given search_box by appending the gap
1525 // distance (scaled by dist_scaling) of the part from the base_part to the
1526 // vector of the appropriate type for the partition. Prior to return, the
1527 // vectors in the dists array are sorted in increasing order.
1528 // The nontext_map (+im_box, rerotation) is used to make text invisible if
1529 // there is non-text in between.
1530 // dists must be an array of GenericVectors of size NPT_COUNT.
1531 void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part,
1532  const ICOORD& dist_scaling,
1533  const TBOX& search_box,
1534  Pix* nontext_map,
1535  const TBOX& im_box,
1536  const FCOORD& rerotation,
1537  bool debug,
1538  GenericVector<int>* dists) {
1539  const TBOX& part_box = base_part.bounding_box();
1540  ColPartitionGridSearch rsearch(this);
1541  rsearch.SetUniqueMode(true);
1542  rsearch.StartRectSearch(search_box);
1543  ColPartition* neighbour;
1544  // Search for compatible neighbours with a similar strokewidth, but not
1545  // on the other side of a tab vector.
1546  while ((neighbour = rsearch.NextRectSearch()) != NULL) {
1547  if (neighbour->IsUnMergeableType() ||
1548  !base_part.ConfirmNoTabViolation(*neighbour) ||
1549  neighbour == &base_part)
1550  continue;
1551  TBOX nbox = neighbour->bounding_box();
1552  BlobRegionType n_type = neighbour->blob_type();
1553  if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
1554  !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation,
1555  nontext_map))
1556  continue; // Text not visible the other side of image.
1557  if (BLOBNBOX::IsLineType(n_type))
1558  continue; // Don't use horizontal lines as neighbours.
1559  int x_gap = MAX(part_box.x_gap(nbox), 0);
1560  int y_gap = MAX(part_box.y_gap(nbox), 0);
1561  int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y();
1562  if (debug) {
1563  tprintf("Part has x-gap=%d, y=%d, dist=%d at:",
1564  x_gap, y_gap, n_dist);
1565  nbox.print();
1566  }
1567  // Truncate the number of boxes, so text doesn't get too much advantage.
1568  int n_boxes = MIN(neighbour->boxes_count(), kSmoothDecisionMargin);
1569  BlobTextFlowType n_flow = neighbour->flow();
1570  GenericVector<int>* count_vector = NULL;
1571  if (n_flow == BTFT_STRONG_CHAIN) {
1572  if (n_type == BRT_TEXT)
1573  count_vector = &dists[NPT_HTEXT];
1574  else
1575  count_vector = &dists[NPT_VTEXT];
1576  if (debug) {
1577  tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes);
1578  }
1579  } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
1580  (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) {
1581  // Medium text counts as weak, and all else counts as image.
1582  if (n_type == BRT_TEXT)
1583  count_vector = &dists[NPT_WEAK_HTEXT];
1584  else
1585  count_vector = &dists[NPT_WEAK_VTEXT];
1586  if (debug) tprintf("Weak %d\n", n_boxes);
1587  } else {
1588  count_vector = &dists[NPT_IMAGE];
1589  if (debug) tprintf("Image %d\n", n_boxes);
1590  }
1591  if (count_vector != NULL) {
1592  for (int i = 0; i < n_boxes; ++i)
1593  count_vector->push_back(n_dist);
1594  }
1595  if (debug) {
1596  neighbour->Print();
1597  }
1598  }
1599  for (int i = 0; i < NPT_COUNT; ++i)
1600  dists[i].sort();
1601 }
1602 
1603 // Improves the margins of the part ColPartition by searching for
1604 // neighbours that vertically overlap significantly.
1605 // columns may be NULL, and indicates the assigned column structure this
1606 // is applicable to part.
1607 void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns,
1608  ColPartition* part) {
1609  // Set up a rectangle search x-bounded by the column and y by the part.
1610  TBOX box = part->bounding_box();
1611  int y = part->MidY();
1612  // Initial left margin is based on the column, if there is one.
1613  int left_margin = bleft().x();
1614  int right_margin = tright().x();
1615  if (columns != NULL) {
1616  ColPartition* column = columns->ColumnContaining(box.left(), y);
1617  if (column != NULL)
1618  left_margin = column->LeftAtY(y);
1619  column = columns->ColumnContaining(box.right(), y);
1620  if (column != NULL)
1621  right_margin = column->RightAtY(y);
1622  }
1623  left_margin -= kColumnWidthFactor;
1624  right_margin += kColumnWidthFactor;
1625  // Search for ColPartitions that reduce the margin.
1626  left_margin = FindMargin(box.left() + box.height(), true, left_margin,
1627  box.bottom(), box.top(), part);
1628  part->set_left_margin(left_margin);
1629  // Search for ColPartitions that reduce the margin.
1630  right_margin = FindMargin(box.right() - box.height(), false, right_margin,
1631  box.bottom(), box.top(), part);
1632  part->set_right_margin(right_margin);
1633 }
1634 
1635 // Starting at x, and going in the specified direction, upto x_limit, finds
1636 // the margin for the given y range by searching sideways,
1637 // and ignoring not_this.
1638 int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
1639  int y_bottom, int y_top,
1640  const ColPartition* not_this) {
1641  int height = y_top - y_bottom;
1642  // Iterate the ColPartitions in the grid.
1643  ColPartitionGridSearch side_search(this);
1644  side_search.SetUniqueMode(true);
1645  side_search.StartSideSearch(x, y_bottom, y_top);
1646  ColPartition* part;
1647  while ((part = side_search.NextSideSearch(right_to_left)) != NULL) {
1648  // Ignore itself.
1649  if (part == not_this) // || part->IsLineType())
1650  continue;
1651  // Must overlap by enough, based on the min of the heights, so
1652  // large partitions can't smash through small ones.
1653  TBOX box = part->bounding_box();
1654  int min_overlap = MIN(height, box.height());
1655  min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5);
1656  int y_overlap = MIN(y_top, box.top()) - MAX(y_bottom, box.bottom());
1657  if (y_overlap < min_overlap)
1658  continue;
1659  // Must be going the right way.
1660  int x_edge = right_to_left ? box.right() : box.left();
1661  if ((x_edge < x) != right_to_left)
1662  continue;
1663  // If we have gone past x_limit, then x_limit will do.
1664  if ((x_edge < x_limit) == right_to_left)
1665  break;
1666  // It reduces x limit, so save the new one.
1667  x_limit = x_edge;
1668  }
1669  return x_limit;
1670 }
1671 
1672 
1673 } // namespace tesseract.