22 #pragma warning(disable:4244) // Conversion warnings
27 #include "config_auto.h"
67 false,
"Show partition bounds");
69 false,
"Show blobs rejected as noise");
71 "Show partition bounds, waiting if >1");
87 TabVector_LIST* vlines, TabVector_LIST* hlines,
88 int vertical_x,
int vertical_y)
89 :
TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
92 mean_column_gap_(tright.x() - bleft.x()),
93 reskew_(1.0
f, 0.0
f), rotation_(1.0
f, 0.0
f), rerotate_(1.0
f, 0.0
f),
94 best_columns_(
NULL), stroke_width_(
NULL),
95 part_grid_(gridsize, bleft, tright), nontext_map_(
NULL),
96 projection_(resolution),
97 denorm_(
NULL), input_blobs_win_(
NULL), equation_detect_(
NULL) {
98 TabVector_IT h_it(&horizontal_lines_);
99 h_it.add_list_after(hlines);
104 if (best_columns_ !=
NULL) {
105 delete [] best_columns_;
107 if (stroke_width_ !=
NULL)
108 delete stroke_width_;
109 delete input_blobs_win_;
110 pixDestroy(&nontext_map_);
111 while (denorm_ !=
NULL) {
112 DENORM* dead_denorm = denorm_;
119 ColPartition_IT part_it(&noise_parts_);
120 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
127 part_it.set_to_list(&good_parts_);
128 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
136 BLOBNBOX_IT bb_it(&image_bblobs_);
137 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
139 delete bblob->
cblob();
153 if (stroke_width_ !=
NULL)
154 delete stroke_width_;
158 #ifndef GRAPHICS_DISABLED
160 input_blobs_win_ =
MakeWindow(0, 0,
"Filtered Input Blobs");
163 #endif // GRAPHICS_DISABLED
165 pixDestroy(&nontext_map_);
171 photo_mask_pix, input_block);
177 stroke_width_->
Clear();
190 BLOBNBOX_CLIST* osd_blobs) {
206 bool vertical_text_lines,
207 int recognition_rotation) {
208 const FCOORD anticlockwise90(0.0
f, 1.0
f);
209 const FCOORD clockwise90(0.0
f, -1.0
f);
210 const FCOORD rotation180(-1.0
f, 0.0
f);
211 const FCOORD norotation(1.0
f, 0.0
f);
213 text_rotation_ = norotation;
216 rotation_ = norotation;
217 if (recognition_rotation == 1) {
218 rotation_ = anticlockwise90;
219 }
else if (recognition_rotation == 2) {
220 rotation_ = rotation180;
221 }
else if (recognition_rotation == 3) {
222 rotation_ = clockwise90;
228 if (recognition_rotation & 1) {
229 vertical_text_lines = !vertical_text_lines;
235 if (vertical_text_lines) {
236 rotation_.
rotate(anticlockwise90);
237 text_rotation_.
rotate(clockwise90);
240 rerotate_ =
FCOORD(rotation_.
x(), -rotation_.
y());
241 if (rotation_.
x() != 1.0f || rotation_.
y() != 0.0f) {
257 tprintf(
"Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
258 vertical_text_lines, recognition_rotation,
259 rotation_.
x(), rotation_.
y(),
260 text_rotation_.
x(), text_rotation_.
y());
266 0.0
f, 0.0
f, 1.0
f, 1.0
f, 0.0
f, 0.0
f);
285 Pix* scaled_color,
int scaled_factor,
286 TO_BLOCK* input_block, Pix* photo_mask_pix,
287 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
288 pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
294 denorm_, &projection_,
295 &part_grid_, &big_parts_);
297 input_block,
this, &part_grid_, &big_parts_);
301 input_block,
this, &part_grid_, &big_parts_);
306 ColPartition_IT p_it(&big_parts_);
307 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
308 p_it.data()->DisownBoxes();
310 delete stroke_width_;
311 stroke_width_ =
NULL;
332 ReflectForRtl(input_block, &image_bblobs_);
344 min_gutter_width_, &part_grid_, &deskew_, &reskew_);
348 0.0
f, 0.0
f, 1.0
f, 1.0
f, 0.0
f, 0.0
f);
349 denorm_ = new_denorm;
355 if (!MakeColumns(single_column)) {
363 #ifndef GRAPHICS_DISABLED
368 #endif // GRAPHICS_DISABLED
374 GridSplitPartitions();
378 GridMergePartitions();
381 InsertRemainingNoise(input_block);
383 GridInsertHLinePartitions();
384 GridInsertVLinePartitions();
394 if (equation_detect_) {
409 GridRemoveUnderlinePartitions();
419 #ifndef GRAPHICS_DISABLED
432 #endif // GRAPHICS_DISABLED
437 ReleaseBlobsAndCleanupUnused(input_block);
442 TransformToBlocks(blocks, to_blocks);
444 tprintf(
"Found %d blocks, %d to_blocks\n",
445 blocks->length(), to_blocks->length());
448 DisplayBlocks(blocks);
449 RotateAndReskewBlocks(input_is_rtl, to_blocks);
451 #ifndef GRAPHICS_DISABLED
452 if (blocks_win_ !=
NULL) {
453 bool waiting =
false;
458 if (*event->parameter ==
'd')
470 #endif // GRAPHICS_DISABLED
478 deskew->
set_y(-deskew->
y());
482 equation_detect_ = detect;
488 void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
489 #ifndef GRAPHICS_DISABLED
491 if (blocks_win_ ==
NULL)
494 blocks_win_->
Clear();
500 BLOCK_IT block_it(blocks);
502 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
503 block_it.forward()) {
504 BLOCK* block = block_it.data();
505 block->
plot(blocks_win_, serial++,
516 void ColumnFinder::DisplayColumnBounds(
PartSetVector* sets) {
517 #ifndef GRAPHICS_DISABLED
526 ColPartitionSet* columns = best_columns_[i];
535 bool ColumnFinder::MakeColumns(
bool single_column) {
540 if (!single_column) {
545 bool good_only =
true;
548 ColPartitionSet* line_set = part_sets.get(i);
549 if (line_set !=
NULL && line_set->LegalColumnCandidate()) {
550 ColPartitionSet* column_candidate = line_set->Copy(good_only);
551 if (column_candidate !=
NULL)
552 column_candidate->AddToColumnSetsIfUnique(&column_sets_,
WidthCB());
555 good_only = !good_only;
556 }
while (column_sets_.
empty() && !good_only);
558 PrintColumnCandidates(
"Column candidates");
560 ImproveColumnCandidates(&column_sets_, &column_sets_);
562 PrintColumnCandidates(
"Improved columns");
564 ImproveColumnCandidates(&part_sets, &column_sets_);
566 ColPartitionSet* single_column_set =
568 if (single_column_set !=
NULL) {
574 PrintColumnCandidates(
"Final Columns");
575 if (!column_sets_.
empty()) {
577 AssignColumns(part_sets);
579 DisplayColumnBounds(&part_sets);
581 ComputeMeanColumnGap();
582 ColPartition_LIST parts;
583 for (
int i = 0; i < part_sets.size(); ++i) {
584 ColPartitionSet* line_set = part_sets.get(i);
585 if (line_set !=
NULL) {
586 line_set->RelinquishParts();
599 void ColumnFinder::ImproveColumnCandidates(
PartSetVector* src_sets,
602 temp_cols.
move(column_sets);
603 if (src_sets == column_sets)
604 src_sets = &temp_cols;
605 int set_size = temp_cols.size();
607 bool good_only =
true;
609 for (
int i = 0; i < set_size; ++i) {
610 ColPartitionSet* column_candidate = temp_cols.get(i);
612 ColPartitionSet* improved = column_candidate->Copy(good_only);
613 if (improved !=
NULL) {
614 improved->ImproveColumnCandidate(
WidthCB(), src_sets);
615 improved->AddToColumnSetsIfUnique(column_sets,
WidthCB());
618 good_only = !good_only;
619 }
while (column_sets->empty() && !good_only);
620 if (column_sets->empty())
621 column_sets->move(&temp_cols);
623 temp_cols.delete_data_pointers();
627 void ColumnFinder::PrintColumnCandidates(
const char* title) {
628 int set_size = column_sets_.
size();
629 tprintf(
"Found %d %s:\n", set_size, title);
631 for (
int i = 0; i < set_size; ++i) {
632 ColPartitionSet* column_set = column_sets_.
get(i);
646 void ColumnFinder::AssignColumns(
const PartSetVector& part_sets) {
647 int set_count = part_sets.size();
650 best_columns_ =
new ColPartitionSet*[set_count];
651 for (
int y = 0; y < set_count; ++y)
652 best_columns_[y] =
NULL;
653 int column_count = column_sets_.
size();
663 bool* any_columns_possible =
new bool[set_count];
664 int* assigned_costs =
new int[set_count];
665 int** column_set_costs =
new int*[set_count];
668 for (
int part_i = 0; part_i < set_count; ++part_i) {
669 ColPartitionSet* line_set = part_sets.get(part_i);
670 bool debug = line_set !=
NULL &&
672 line_set->bounding_box().bottom());
673 column_set_costs[part_i] =
new int[column_count];
674 any_columns_possible[part_i] =
false;
676 for (
int col_i = 0; col_i < column_count; ++col_i) {
677 if (line_set !=
NULL &&
678 column_sets_.
get(col_i)->CompatibleColumns(debug, line_set,
680 column_set_costs[part_i][col_i] =
681 column_sets_.
get(col_i)->UnmatchedWidth(line_set);
682 any_columns_possible[part_i] =
true;
684 column_set_costs[part_i][col_i] =
MAX_INT32;
686 tprintf(
"Set id %d did not match at y=%d, lineset =%p\n",
687 col_i, part_i, line_set);
694 while (BiggestUnassignedRange(set_count, any_columns_possible,
697 tprintf(
"Biggest unassigned range = %d- %d\n", start, end);
699 int column_set_id = RangeModalColumnSet(column_set_costs,
700 assigned_costs, start, end);
702 tprintf(
"Range modal column id = %d\n", column_set_id);
703 column_sets_.
get(column_set_id)->Print();
706 ShrinkRangeToLongestRun(column_set_costs, assigned_costs,
707 any_columns_possible,
708 column_set_id, &start, &end);
710 tprintf(
"Shrunk range = %d- %d\n", start, end);
714 ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
715 any_columns_possible,
716 column_set_id, -1, -1, &start);
718 ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
719 any_columns_possible,
720 column_set_id, 1, set_count, &end);
723 tprintf(
"Column id %d applies to range = %d - %d\n",
724 column_set_id, start, end);
726 AssignColumnToRange(column_set_id, start, end, column_set_costs,
731 if (best_columns_[0] ==
NULL) {
732 AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs);
735 for (
int i = 0; i < set_count; ++i) {
736 delete [] column_set_costs[i];
738 delete [] assigned_costs;
739 delete [] any_columns_possible;
740 delete [] column_set_costs;
745 bool ColumnFinder::BiggestUnassignedRange(
int set_count,
746 const bool* any_columns_possible,
747 int* best_start,
int* best_end) {
748 int best_range_size = 0;
749 *best_start = set_count;
750 *best_end = set_count;
752 for (
int start = 0; start <
gridheight_; start = end) {
754 while (start < set_count) {
755 if (best_columns_[start] ==
NULL && any_columns_possible[start])
762 while (end < set_count) {
763 if (best_columns_[end] !=
NULL)
765 if (any_columns_possible[end])
769 if (start < set_count && range_size > best_range_size) {
770 best_range_size = range_size;
775 return *best_start < *best_end;
779 int ColumnFinder::RangeModalColumnSet(
int** column_set_costs,
780 const int* assigned_costs,
781 int start,
int end) {
782 int column_count = column_sets_.
size();
783 STATS column_stats(0, column_count);
784 for (
int part_i = start; part_i < end; ++part_i) {
785 for (
int col_j = 0; col_j < column_count; ++col_j) {
786 if (column_set_costs[part_i][col_j] < assigned_costs[part_i])
787 column_stats.add(col_j, 1);
791 return column_stats.mode();
798 void ColumnFinder::ShrinkRangeToLongestRun(
int** column_set_costs,
799 const int* assigned_costs,
800 const bool* any_columns_possible,
802 int* best_start,
int* best_end) {
804 int orig_start = *best_start;
805 int orig_end = *best_end;
806 int best_range_size = 0;
807 *best_start = orig_end;
808 *best_end = orig_end;
810 for (
int start = orig_start; start < orig_end; start = end) {
812 while (start < orig_end) {
813 if (column_set_costs[start][column_set_id] < assigned_costs[start] ||
814 !any_columns_possible[start])
820 while (end < orig_end) {
821 if (column_set_costs[end][column_set_id] >= assigned_costs[start] &&
822 any_columns_possible[end])
826 if (start < orig_end && end - start > best_range_size) {
827 best_range_size = end - start;
837 void ColumnFinder::ExtendRangePastSmallGaps(
int** column_set_costs,
838 const int* assigned_costs,
839 const bool* any_columns_possible,
841 int step,
int end,
int* start) {
843 tprintf(
"Starting expansion at %d, step=%d, limit=%d\n",
848 int barrier_size = 0;
854 for (i = *start + step; i != end; i += step) {
855 if (column_set_costs[i][column_set_id] < assigned_costs[i])
858 if (any_columns_possible[i])
862 tprintf(
"At %d, Barrier size=%d\n", i, barrier_size);
872 for (i += step; i != end; i += step) {
873 if (column_set_costs[i][column_set_id] < assigned_costs[i])
875 else if (any_columns_possible[i])
879 tprintf(
"At %d, good size = %d\n", i, good_size);
881 if (good_size >= barrier_size)
883 }
while (good_size >= barrier_size);
887 void ColumnFinder::AssignColumnToRange(
int column_set_id,
int start,
int end,
888 int** column_set_costs,
889 int* assigned_costs) {
890 ColPartitionSet* column_set = column_sets_.
get(column_set_id);
891 for (
int i = start; i < end; ++i) {
892 assigned_costs[i] = column_set_costs[i][column_set_id];
893 best_columns_[i] = column_set;
898 void ColumnFinder::ComputeMeanColumnGap() {
902 int width_samples = 0;
910 mean_column_gap_ = gap_samples > 0 ? total_gap / gap_samples
911 : total_width / width_samples;
919 static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) {
920 for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) {
923 delete blob->
cblob();
932 void ColumnFinder::ReleaseBlobsAndCleanupUnused(
TO_BLOCK* block) {
933 ReleaseAllBlobsAndDeleteUnused(&block->
blobs);
934 ReleaseAllBlobsAndDeleteUnused(&block->
small_blobs);
935 ReleaseAllBlobsAndDeleteUnused(&block->
noise_blobs);
936 ReleaseAllBlobsAndDeleteUnused(&block->
large_blobs);
937 ReleaseAllBlobsAndDeleteUnused(&image_bblobs_);
941 void ColumnFinder::GridSplitPartitions() {
943 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
944 gsearch(&part_grid_);
945 gsearch.StartFullSearch();
946 ColPartition* dont_repeat =
NULL;
948 while ((part = gsearch.NextFullSearch()) !=
NULL) {
949 if (part->blob_type() <
BRT_UNKNOWN || part == dont_repeat)
951 ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
955 part->ColumnRange(
resolution_, column_set, &first_col, &last_col);
964 if (last_col != first_col + 1)
967 int y = part->MidY();
968 TBOX margin_box = part->bounding_box();
972 tprintf(
"Considering partition for GridSplit:");
975 ColPartition* column = column_set->GetColumnByIndex(first_col);
978 margin_box.
set_left(column->RightAtY(y) + 2);
979 column = column_set->GetColumnByIndex(last_col);
982 margin_box.
set_right(column->LeftAtY(y) - 2);
986 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(
this);
988 tprintf(
"Searching box (%d,%d)->(%d,%d)\n",
990 margin_box.
right(), margin_box.
top());
993 rectsearch.StartRectSearch(margin_box);
995 while ((bbox = rectsearch.NextRectSearch()) !=
NULL) {
1001 gsearch.RemoveBBox();
1002 int x_middle = (margin_box.
left() + margin_box.
right()) / 2;
1004 tprintf(
"Splitting part at %d:", x_middle);
1007 ColPartition* split_part = part->SplitAt(x_middle);
1008 if (split_part !=
NULL) {
1012 split_part->Print();
1014 part_grid_.
InsertBBox(
true,
true, split_part);
1018 tprintf(
"Split had no effect\n");
1022 gsearch.RepositionIterator();
1024 tprintf(
"Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n",
1033 void ColumnFinder::GridMergePartitions() {
1035 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1036 gsearch(&part_grid_);
1037 gsearch.StartFullSearch();
1039 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1040 if (part->IsUnMergeableType())
1043 ColPartitionSet* columns = best_columns_[gsearch.GridY()];
1044 TBOX box = part->bounding_box();
1047 tprintf(
"Considering part for merge at:");
1050 int y = part->MidY();
1051 ColPartition* left_column = columns->ColumnContaining(box.
left(), y);
1052 ColPartition* right_column = columns->ColumnContaining(box.
right(), y);
1053 if (left_column ==
NULL || right_column != left_column) {
1055 tprintf(
"In different columns\n");
1058 box.
set_left(left_column->LeftAtY(y));
1059 box.
set_right(right_column->RightAtY(y));
1061 bool modified_box =
false;
1062 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1063 rsearch(&part_grid_);
1064 rsearch.SetUniqueMode(
true);
1065 rsearch.StartRectSearch(box);
1066 ColPartition* neighbour;
1068 while ((neighbour = rsearch.NextRectSearch()) !=
NULL) {
1069 if (neighbour == part || neighbour->IsUnMergeableType())
1071 const TBOX& neighbour_box = neighbour->bounding_box();
1073 tprintf(
"Considering merge with neighbour at:");
1076 if (neighbour_box.
right() < box.
left() ||
1079 if (part->VSignificantCoreOverlap(*neighbour) &&
1080 part->TypesMatch(*neighbour)) {
1086 const TBOX& part_box = part->bounding_box();
1089 if (neighbour_box.
left() > part->right_margin() &&
1090 part_box.
right() < neighbour->left_margin())
1092 if (neighbour_box.
right() < part->left_margin() &&
1093 part_box.
left() > neighbour->right_margin())
1095 int h_gap =
MAX(part_box.
left(), neighbour_box.
left()) -
1098 part_box.
width() < mean_column_gap_ ||
1099 neighbour_box.
width() < mean_column_gap_) {
1101 tprintf(
"Running grid-based merge between:\n");
1105 rsearch.RemoveBBox();
1106 gsearch.RepositionIterator();
1107 part->Absorb(neighbour,
WidthCB());
1108 modified_box =
true;
1110 tprintf(
"Neighbour failed hgap test\n");
1113 tprintf(
"Neighbour failed overlap or typesmatch test\n");
1123 gsearch.RemoveBBox();
1125 gsearch.RepositionIterator();
1132 void ColumnFinder::InsertRemainingNoise(
TO_BLOCK* block) {
1134 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1142 rsearch.SetUniqueMode(
true);
1143 rsearch.StartRectSearch(search_box);
1145 ColPartition* best_part =
NULL;
1146 int best_distance = 0;
1147 while ((part = rsearch.NextRectSearch()) !=
NULL) {
1148 if (part->IsUnMergeableType())
1152 if (best_part ==
NULL || distance < best_distance) {
1154 best_distance = distance;
1157 if (best_part !=
NULL &&
1158 best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
1161 tprintf(
"Adding noise blob with distance %d, thr=%g:box:",
1169 best_part->AddBox(blob);
1170 part_grid_.
InsertBBox(
true,
true, best_part);
1184 static TBOX BoxFromHLine(
const TabVector* hline) {
1185 int top =
MAX(hline->startpt().y(), hline->endpt().y());
1186 int bottom =
MIN(hline->startpt().y(), hline->endpt().y());
1187 top += hline->mean_width();
1188 if (top == bottom) {
1194 return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top);
1199 void ColumnFinder::GridRemoveUnderlinePartitions() {
1200 TabVector_IT hline_it(&horizontal_lines_);
1201 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
1202 TabVector* hline = hline_it.data();
1203 if (hline->intersects_other_lines())
1205 TBOX line_box = BoxFromHLine(hline);
1206 TBOX search_box = line_box;
1209 part_search.SetUniqueMode(
true);
1210 part_search.StartRectSearch(search_box);
1211 ColPartition* covered;
1212 bool touched_table =
false;
1213 bool touched_text =
false;
1214 ColPartition* line_part =
NULL;
1215 while ((covered = part_search.NextRectSearch()) !=
NULL) {
1217 touched_table =
true;
1219 }
else if (covered->IsTextType()) {
1221 int text_bottom = covered->median_bottom();
1222 if (line_box.
bottom() <= text_bottom && text_bottom <= search_box.
top())
1223 touched_text =
true;
1224 }
else if (covered->blob_type() ==
BRT_HLINE &&
1225 line_box.
contains(covered->bounding_box())) {
1226 line_part = covered;
1229 if (line_part !=
NULL && !touched_table && touched_text) {
1237 void ColumnFinder::GridInsertHLinePartitions() {
1238 TabVector_IT hline_it(&horizontal_lines_);
1239 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
1240 TabVector* hline = hline_it.data();
1241 TBOX line_box = BoxFromHLine(hline);
1246 bool any_image =
false;
1248 part_search.SetUniqueMode(
true);
1249 part_search.StartRectSearch(line_box);
1250 ColPartition* covered;
1251 while ((covered = part_search.NextRectSearch()) !=
NULL) {
1252 if (covered->IsImageType()) {
1265 void ColumnFinder::GridInsertVLinePartitions() {
1267 for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) {
1268 TabVector* vline = vline_it.data();
1269 if (!vline->IsSeparator())
1271 int left =
MIN(vline->startpt().x(), vline->endpt().x());
1272 int right =
MAX(vline->startpt().x(), vline->endpt().x());
1273 right += vline->mean_width();
1274 if (left == right) {
1282 left, vline->startpt().
y(), right, vline->endpt().y());
1284 bool any_image =
false;
1286 part_search.SetUniqueMode(
true);
1287 part_search.StartRectSearch(part->bounding_box());
1288 ColPartition* covered;
1289 while ((covered = part_search.NextRectSearch()) !=
NULL) {
1290 if (covered->IsImageType()) {
1304 void ColumnFinder::SetPartitionTypes() {
1305 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1306 gsearch(&part_grid_);
1307 gsearch.StartFullSearch();
1309 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1310 part->SetPartitionType(
resolution_, best_columns_[gsearch.GridY()]);
1316 void ColumnFinder::SmoothPartnerRuns() {
1318 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1319 gsearch(&part_grid_);
1320 gsearch.StartFullSearch();
1322 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1323 ColPartition* partner = part->SingletonPartner(
true);
1324 if (partner !=
NULL) {
1325 if (partner->SingletonPartner(
false) != part) {
1326 tprintf(
"Ooops! Partition:(%d partners)",
1327 part->upper_partners()->length());
1329 tprintf(
"has singleton partner:(%d partners",
1330 partner->lower_partners()->length());
1332 tprintf(
"but its singleton partner is:");
1333 if (partner->SingletonPartner(
false) ==
NULL)
1336 partner->SingletonPartner(
false)->Print();
1338 ASSERT_HOST(partner->SingletonPartner(
false) == part);
1339 }
else if (part->SingletonPartner(
false) !=
NULL) {
1340 ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
1342 part->SmoothPartnerRun(column_count * 2 + 1);
1349 void ColumnFinder::AddToTempPartList(ColPartition* part,
1350 ColPartition_CLIST* temp_list) {
1351 int mid_y = part->MidY();
1352 ColPartition_C_IT it(temp_list);
1353 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1354 ColPartition* test_part = it.data();
1357 if (test_part == part->SingletonPartner(
false))
1359 int neighbour_bottom = test_part->median_bottom();
1360 int neighbour_top = test_part->median_top();
1361 int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1362 if (neighbour_y < mid_y)
1364 if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part))
1367 if (it.cycled_list()) {
1368 it.add_to_end(part);
1370 it.add_before_stay_put(part);
1375 void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list,
1376 WorkingPartSet_LIST* work_set) {
1377 ColPartition_C_IT it(temp_list);
1378 while (!it.empty()) {
1380 &good_parts_, work_set);
1386 void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks,
1387 TO_BLOCK_LIST* to_blocks) {
1388 WorkingPartSet_LIST work_set;
1389 ColPartitionSet* column_set =
NULL;
1390 ColPartition_IT noise_it(&noise_parts_);
1394 ColPartition_CLIST temp_part_list;
1396 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1397 gsearch(&part_grid_);
1398 gsearch.StartFullSearch();
1399 int prev_grid_y = -1;
1401 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1402 int grid_y = gsearch.GridY();
1403 if (grid_y != prev_grid_y) {
1404 EmptyTempPartList(&temp_part_list, &work_set);
1405 prev_grid_y = grid_y;
1407 if (best_columns_[grid_y] != column_set) {
1408 column_set = best_columns_[grid_y];
1412 &good_parts_, &work_set);
1414 tprintf(
"Changed column groups at grid index %d, y=%d\n",
1415 gsearch.GridY(), gsearch.GridY() *
gridsize());
1418 noise_it.add_to_end(part);
1420 AddToTempPartList(part, &temp_part_list);
1423 EmptyTempPartList(&temp_part_list, &work_set);
1425 WorkingPartSet_IT work_it(&work_set);
1426 while (!work_it.empty()) {
1427 WorkingPartSet* working_set = work_it.extract();
1429 &good_parts_, blocks, to_blocks);
1437 static void ReflectBlobList(BLOBNBOX_LIST* bblobs) {
1438 BLOBNBOX_IT it(bblobs);
1439 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1440 it.data()->reflect_box_in_y_axis();
1450 void ColumnFinder::ReflectForRtl(
TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
1451 ReflectBlobList(bblobs);
1452 ReflectBlobList(&input_block->
blobs);
1459 0.0
f, 0.0
f, -1.0
f, 1.0
f, 0.0
f, 0.0
f);
1460 denorm_ = new_denorm;
1475 void ColumnFinder::RotateAndReskewBlocks(
bool input_is_rtl,
1476 TO_BLOCK_LIST* blocks) {
1483 TO_BLOCK_IT it(blocks);
1484 int block_index = 1;
1485 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1499 FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
1504 BLOBNBOX_IT blob_it(&to_block->
blobs);
1505 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1507 if (blob_rotation.
x() != 1.0f || blob_rotation.
y() != 0.0f) {
1515 static_cast<int>(heights.median() + 0.5));
1517 tprintf(
"Block median size = (%d, %d)\n",
1527 FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(
BLOCK* block) {
1536 FCOORD classify_rotation(text_rotation_);
1543 if (rerotate_.
x() == 0.0f)
1544 block_rotation = rerotate_;
1546 block_rotation =
FCOORD(0.0
f, -1.0
f);
1547 block->
rotate(block_rotation);
1548 classify_rotation =
FCOORD(1.0
f, 0.0
f);
1550 block_rotation.rotate(rotation_);
1554 FCOORD blob_rotation(block_rotation);
1555 block_rotation.set_y(-block_rotation.y());
1559 tprintf(
"Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:",
1562 classify_rotation.x(), classify_rotation.y());
1564 return blob_rotation;