tesseract  3.04.00
tesshelper Namespace Reference

Functions

def getProjectfiles (libTessDir, libProjectFile, nTrimChars)
 
def getTessLibFiles (tessDir, nTrimChars)
 
def tessCompare (tessDir)
 
def tessReport (tessDir)
 
def copyIncludes (fileSet, description, tessDir, includeDir)
 
def tessCopy (tessDir, includeDir)
 
def tessClean (tessDir)
 
def validateTessDir (tessDir)
 
def validateDir (dir)
 
def main ()
 

Variables

string epilogStr = r""
 
string VERSION = "1.0 %s"
 
string PROJ_SUBDIR = r"vs2008\libtesseract"
 
string PROJFILE = "libtesseract.vcproj"
 
string NEWHEADERS_FILENAME = "newheaders.txt"
 
string NEWSOURCES_FILENAME = "newsources.txt"
 
 fileNodeTemplate = \
 

Function Documentation

def tesshelper.copyIncludes (   fileSet,
  description,
  tessDir,
  includeDir 
)
Copy set of files to specified include dir.

Definition at line 265 of file tesshelper.py.

265 def copyIncludes(fileSet, description, tessDir, includeDir):
266  """Copy set of files to specified include dir."""
267 
268  print()
269  print('Copying libtesseract "%s" headers to %s' % (description, includeDir))
270  print()
271 
272  sortedList = list(fileSet)
273  sortedList.sort()
274 
275  count = 0
276  errList = []
277  for includeFile in sortedList:
278  filepath = os.path.join(tessDir, includeFile)
279  if os.path.isfile(filepath):
280  shutil.copy2(filepath, includeDir)
281  print("Copied: %s" % includeFile)
282  count += 1
283  else:
284  print('***Error: "%s" doesn\'t exist"' % filepath)
285  errList.append(filepath)
286 
287  print('%d header files successfully copied to "%s"' % (count, includeDir))
288  if len(errList):
289  print("The following %d files were not copied:")
290  for filepath in errList:
291  print(" %s" % filepath)
292 
def copyIncludes(fileSet, description, tessDir, includeDir)
Definition: tesshelper.py:265
def tesshelper.getProjectfiles (   libTessDir,
  libProjectFile,
  nTrimChars 
)
Return sets of all, c, h, and resources files in libtesseract Project

Definition at line 84 of file tesshelper.py.

84 def getProjectfiles(libTessDir, libProjectFile, nTrimChars):
85  """Return sets of all, c, h, and resources files in libtesseract Project"""
86 
87  #extract filenames of header & source files from the .vcproj
88  projectCFiles = set()
89  projectHFiles = set()
90  projectRFiles = set()
91  projectFilesSet = set()
92  f = open(libProjectFile, "r")
93  data = f.read()
94  f.close()
95 
96  projectFiles = re.findall(r'(?i)Include="(\.[^"]+)"', data)
97  for projectFile in projectFiles:
98  root, ext = os.path.splitext(projectFile.lower())
99  if ext == ".c" or ext == ".cpp":
100  projectCFiles.add(projectFile)
101  elif ext == ".h":
102  projectHFiles.add(projectFile)
103  elif ext == ".rc":
104  projectRFiles.add(projectFile)
105  else:
106  print("unknown file type: %s" % projectFile)
107 
108  relativePath = os.path.join(libTessDir, projectFile)
109  relativePath = os.path.abspath(relativePath)
110  relativePath = relativePath[nTrimChars:].lower()
111  projectFilesSet.add(relativePath)
112 
113  return projectFilesSet, projectHFiles, projectCFiles, projectRFiles
114 
def getProjectfiles(libTessDir, libProjectFile, nTrimChars)
Definition: tesshelper.py:84
def tesshelper.getTessLibFiles (   tessDir,
  nTrimChars 
)
Return set of all libtesseract files in tessDir

Definition at line 115 of file tesshelper.py.

115 def getTessLibFiles(tessDir, nTrimChars):
116  """Return set of all libtesseract files in tessDir"""
117 
118  libDirs = [
119  "api",
120  "ccmain",
121  "ccstruct",
122  "ccutil",
123  "classify",
124  "cube",
125  "cutil",
126  "dict",
127  r"neural_networks\runtime",
128  "opencl",
129  "textord",
130  "viewer",
131  "wordrec",
132  #"training",
133  r"vs2010\port",
134  r"vs2010\libtesseract",
135  ]
136 
137  #create list of all .h, .c, .cpp files in "library" directories
138  tessFiles = set()
139  for curDir in libDirs:
140  baseDir = os.path.join(tessDir, curDir)
141  for filetype in ["*.c", "*.cpp", "*.h"]:
142  pattern = os.path.join(baseDir, filetype)
143  fileList = glob.glob(pattern)
144  for curFile in fileList:
145  curFile = os.path.abspath(curFile)
146  relativePath = curFile[nTrimChars:].lower()
147  tessFiles.add(relativePath)
148 
149  return tessFiles
150 
151 # ====================================================================
152 
def getTessLibFiles(tessDir, nTrimChars)
Definition: tesshelper.py:115
def tesshelper.main ( )

Definition at line 459 of file tesshelper.py.

459 def main ():
460  parser = argparse.ArgumentParser(
461  epilog=epilogStr,
462  formatter_class=argparse.RawDescriptionHelpFormatter)
463 
464  parser.add_argument("--version", action="version",
465  version="%(prog)s " + VERSION)
466  parser.add_argument('tessDir', type=validateTessDir,
467  help="tesseract installation directory")
468 
469  subparsers = parser.add_subparsers(
470  dest="subparser_name",
471  title="Commands")
472  parser_changes = subparsers.add_parser('compare',
473  help="compare libtesseract Project with tessDir")
474  parser_changes.set_defaults(func=tessCompare)
475 
476  parser_report = subparsers.add_parser('report',
477  help="report libtesseract summary stats")
478  parser_report.set_defaults(func=tessReport)
479 
480  parser_copy = subparsers.add_parser('copy',
481  help="copy public libtesseract header files to includeDir")
482  parser_copy.add_argument('includeDir', type=validateDir,
483  help="Directory to copy header files to.")
484  parser_copy.set_defaults(func=tessCopy)
485 
486  parser_clean = subparsers.add_parser('clean',
487  help="clean vs2008 folder of build folders and .user files")
488  parser_clean.set_defaults(func=tessClean)
489 
490  #kludge because argparse has no ability to set default subparser
491  if (len(sys.argv) == 2):
492  sys.argv.append("compare")
493  args = parser.parse_args()
494 
495  #handle commands
496  if args.func == tessCopy:
497  args.func(args.tessDir, args.includeDir)
498  else:
499  args.func(args.tessDir)
500 
def main()
Definition: tesshelper.py:459
def tesshelper.tessClean (   tessDir)
Clean vs2008 folder of all build directories and certain temp files.

Definition at line 382 of file tesshelper.py.

382 def tessClean(tessDir):
383  '''Clean vs2008 folder of all build directories and certain temp files.'''
384 
385  vs2010Dir = os.path.join(tessDir, "vs2008")
386  vs2008AbsDir = os.path.abspath(vs2010Dir)
387 
388  answer = eval(input(
389  'Are you sure you want to clean the\n "%s" folder (Yes/No) [No]? ' %
390  vs2008AbsDir))
391  if answer.lower() not in ("yes",):
392  return
393  answer = eval(input('Only list the items to be deleted (Yes/No) [Yes]? '))
394  answer = answer.strip()
395  listOnly = answer.lower() not in ("no",)
396 
397  for rootDir, dirs, files in os.walk(vs2008AbsDir):
398  for buildDir in ("LIB_Release", "LIB_Debug", "DLL_Release", "DLL_Debug"):
399  if buildDir in dirs:
400  dirs.remove(buildDir)
401  absBuildDir = os.path.join(rootDir, buildDir)
402  if listOnly:
403  print("Would remove: %s" % absBuildDir)
404  else:
405  print("Removing: %s" % absBuildDir)
406  shutil.rmtree(absBuildDir)
407 
408  if rootDir == vs2008AbsDir:
409  for file in files:
410  if file.lower() not in ("tesseract.sln",
411  "tesshelper.py",
412  "readme.txt"):
413  absPath = os.path.join(rootDir, file)
414  if listOnly:
415  print("Would remove: %s" % absPath)
416  else:
417  print("Removing: %s" % absPath)
418  os.remove(absPath)
419  else:
420  for file in files:
421  root, ext = os.path.splitext(file)
422  if ext.lower() in (".suo",
423  ".ncb",
424  ".user",
425  ) or (
426  len(ext)>0 and ext[-1] == "~"):
427  absPath = os.path.join(rootDir, file)
428  if listOnly:
429  print("Would remove: %s" % absPath)
430  else:
431  print("Removing: %s" % absPath)
432  os.remove(absPath)
433 
434 # ====================================================================
435 
def tessClean(tessDir)
Definition: tesshelper.py:382
def tesshelper.tessCompare (   tessDir)
Compare libtesseract Project files and actual "sub-library" files.

Definition at line 153 of file tesshelper.py.

153 def tessCompare(tessDir):
154  '''Compare libtesseract Project files and actual "sub-library" files.'''
155 
156  vs2010Dir = os.path.join(tessDir, "vs2010")
157  libTessDir = os.path.join(vs2010Dir, "libtesseract")
158  libProjectFile = os.path.join(libTessDir,"libtesseract.vcxproj")
159  tessAbsDir = os.path.abspath(tessDir)
160  nTrimChars = len(tessAbsDir)+1
161  print('Comparing VS2010 Project "%s" with\n "%s"' % (libProjectFile,
162  tessAbsDir))
163 
164  projectFilesSet, projectHFiles, projectCFiles, projectRFiles = \
165  getProjectfiles(libTessDir, libProjectFile, nTrimChars)
166  tessFiles = getTessLibFiles(tessDir, nTrimChars)
167 
168  extraFiles = tessFiles - projectFilesSet
169  print("%2d Extra files (in %s but not in Project)" % (len(extraFiles),
170  tessAbsDir))
171  headerFiles = []
172  sourceFiles = []
173  sortedList = list(extraFiles)
174  sortedList.sort()
175  for filename in sortedList:
176  root, ext = os.path.splitext(filename.lower())
177  if ext == ".h":
178  headerFiles.append(filename)
179  else:
180  sourceFiles.append(filename)
181  print(" %s " % filename)
182 
183  print()
184  print("%2d new header file items written to %s" % (len(headerFiles),
185  NEWHEADERS_FILENAME))
186  headerFiles.sort()
187  with open(NEWHEADERS_FILENAME, "w") as f:
188  for filename in headerFiles:
189  f.write(fileNodeTemplate % filename)
190 
191  print("%2d new source file items written to %s" % (len(sourceFiles),
192  NEWSOURCES_FILENAME))
193  sourceFiles.sort()
194  with open(NEWSOURCES_FILENAME, "w") as f:
195  for filename in sourceFiles:
196  f.write(fileNodeTemplate % filename)
197  print()
198 
199  deadFiles = projectFilesSet - tessFiles
200  print("%2d Dead files (in Project but not in %s" % (len(deadFiles),
201  tessAbsDir))
202  sortedList = list(deadFiles)
203  sortedList.sort()
204  for filename in sortedList:
205  print(" %s " % filename)
206 
207 # ====================================================================
208 
def getTessLibFiles(tessDir, nTrimChars)
Definition: tesshelper.py:115
def getProjectfiles(libTessDir, libProjectFile, nTrimChars)
Definition: tesshelper.py:84
def tessCompare(tessDir)
Definition: tesshelper.py:153
def tesshelper.tessCopy (   tessDir,
  includeDir 
)
Copy all "public" libtesseract Project header files to include directory.

Preserves directory hierarchy.

Definition at line 293 of file tesshelper.py.

293 def tessCopy(tessDir, includeDir):
294  '''Copy all "public" libtesseract Project header files to include directory.
295 
296  Preserves directory hierarchy.'''
297 
298  baseIncludeSet = {
299  r"api\baseapi.h",
300  r"api\capi.h",
301  r"api\apitypes.h",
302  r"ccstruct\publictypes.h",
303  r"ccmain\thresholder.h",
304  r"ccutil\host.h",
305  r"ccutil\basedir.h",
306  r"ccutil\tesscallback.h",
307  r"ccutil\unichar.h",
308  r"ccutil\platform.h",
309  }
310 
311  strngIncludeSet = {
312  r"ccutil\strngs.h",
313  r"ccutil\memry.h",
314  r"ccutil\host.h",
315  r"ccutil\serialis.h",
316  r"ccutil\errcode.h",
317  r"ccutil\fileerr.h",
318  #r"ccutil\genericvector.h",
319  }
320 
321  resultIteratorIncludeSet = {
322  r"ccmain\ltrresultiterator.h",
323  r"ccmain\pageiterator.h",
324  r"ccmain\resultiterator.h",
325  r"ccutil\genericvector.h",
326  r"ccutil\tesscallback.h",
327  r"ccutil\errcode.h",
328  r"ccutil\host.h",
329  r"ccutil\helpers.h",
330  r"ccutil\ndminx.h",
331  r"ccutil\params.h",
332  r"ccutil\unicharmap.h",
333  r"ccutil\unicharset.h",
334  }
335 
336  genericVectorIncludeSet = {
337  r"ccutil\genericvector.h",
338  r"ccutil\tesscallback.h",
339  r"ccutil\errcode.h",
340  r"ccutil\host.h",
341  r"ccutil\helpers.h",
342  r"ccutil\ndminx.h",
343  }
344 
345  blobsIncludeSet = {
346  r"ccstruct\blobs.h",
347  r"ccstruct\rect.h",
348  r"ccstruct\points.h",
349  r"ccstruct\ipoints.h",
350  r"ccutil\elst.h",
351  r"ccutil\host.h",
352  r"ccutil\serialis.h",
353  r"ccutil\lsterr.h",
354  r"ccutil\ndminx.h",
355  r"ccutil\tprintf.h",
356  r"ccutil\params.h",
357  r"viewer\scrollview.h",
358  r"ccstruct\vecfuncs.h",
359  }
360 
361  extraFilesSet = {
362  #r"vs2008\include\stdint.h",
363  r"vs2008\include\leptonica_versionnumbers.vsprops",
364  r"vs2008\include\tesseract_versionnumbers.vsprops",
365  }
366 
367  tessIncludeDir = os.path.join(includeDir, "tesseract")
368  if os.path.isfile(tessIncludeDir):
369  print('Aborting: "%s" is a file not a directory.' % tessIncludeDir)
370  return
371  if not os.path.exists(tessIncludeDir):
372  os.mkdir(tessIncludeDir)
373 
374  #fileSet = baseIncludeSet | strngIncludeSet | genericVectorIncludeSet | blobsIncludeSet
375  fileSet = baseIncludeSet | strngIncludeSet | resultIteratorIncludeSet
376 
377  copyIncludes(fileSet, "public", tessDir, tessIncludeDir)
378  copyIncludes(extraFilesSet, "extra", tessDir, includeDir)
379 
380 # ====================================================================
381 
def tessCopy(tessDir, includeDir)
Definition: tesshelper.py:293
def copyIncludes(fileSet, description, tessDir, includeDir)
Definition: tesshelper.py:265
def tesshelper.tessReport (   tessDir)
Report summary stats on "sub-library" files and libtesseract Project file.

Definition at line 209 of file tesshelper.py.

209 def tessReport(tessDir):
210  """Report summary stats on "sub-library" files and libtesseract Project file."""
211 
212  vs2010Dir = os.path.join(tessDir, "vs2008")
213  libTessDir = os.path.join(vs2010Dir, "libtesseract")
214  libProjectFile = os.path.join(libTessDir,"libtesseract.vcproj")
215  tessAbsDir = os.path.abspath(tessDir)
216  nTrimChars = len(tessAbsDir)+1
217 
218  projectFilesSet, projectHFiles, projectCFiles, projectRFiles = \
219  getProjectfiles(libTessDir, libProjectFile, nTrimChars)
220  tessFiles = getTessLibFiles(tessDir, nTrimChars)
221 
222  print('Summary stats for "%s" library directories' % tessAbsDir)
223  folderCounters = {}
224  for tessFile in tessFiles:
225  tessFile = tessFile.lower()
226  folder, head = os.path.split(tessFile)
227  file, ext = os.path.splitext(head)
228  typeCounter = folderCounters.setdefault(folder, collections.Counter())
229  typeCounter[ext[1:]] += 1
230 
231  folders = list(folderCounters.keys())
232  folders.sort()
233  totalFiles = 0
234  totalH = 0
235  totalCPP = 0
236  totalOther = 0
237 
238  print()
239  print(" total h cpp")
240  print(" ----- --- ---")
241  for folder in folders:
242  counters = folderCounters[folder]
243  nHFiles = counters['h']
244  nCPPFiles = counters['cpp']
245 
246  total = nHFiles + nCPPFiles
247  totalFiles += total
248  totalH += nHFiles
249  totalCPP += nCPPFiles
250 
251  print(" %5d %3d %3d %s" % (total, nHFiles, nCPPFiles, folder))
252  print(" ----- --- ---")
253  print(" %5d %3d %3d" % (totalFiles, totalH, totalCPP))
254 
255  print()
256  print('Summary stats for VS2008 Project "%s"' % libProjectFile)
257  print(" %5d %s" %(len(projectHFiles), "Header files"))
258  print(" %5d %s" % (len(projectCFiles), "Source files"))
259  print(" %5d %s" % (len(projectRFiles), "Resource files"))
260  print(" -----")
261  print(" %5d" % (len(projectHFiles) + len(projectCFiles) + len(projectRFiles), ))
262 
263 # ====================================================================
264 
def getTessLibFiles(tessDir, nTrimChars)
Definition: tesshelper.py:115
def getProjectfiles(libTessDir, libProjectFile, nTrimChars)
Definition: tesshelper.py:84
def tessReport(tessDir)
Definition: tesshelper.py:209
def tesshelper.validateDir (   dir)
Check that dir is a valid directory named include.

Definition at line 446 of file tesshelper.py.

446 def validateDir(dir):
447  """Check that dir is a valid directory named include."""
448 
449  if not os.path.isdir(dir):
450  raise argparse.ArgumentTypeError('Directory "%s" doesn\'t exist.' % dir)
451 
452  dirpath = os.path.abspath(dir)
453  head, tail = os.path.split(dirpath)
454  if tail.lower() != "include":
455  raise argparse.ArgumentTypeError('Include directory "%s" must be named "include".' % tail)
456 
457  return dir
458 
def validateDir(dir)
Definition: tesshelper.py:446
def tesshelper.validateTessDir (   tessDir)
Check that tessDir is a valid tesseract directory.

Definition at line 436 of file tesshelper.py.

436 def validateTessDir(tessDir):
437  """Check that tessDir is a valid tesseract directory."""
438 
439  if not os.path.isdir(tessDir):
440  raise argparse.ArgumentTypeError('Directory "%s" doesn\'t exist.' % tessDir)
441  projFile = os.path.join(tessDir, PROJ_SUBDIR, PROJFILE)
442  if not os.path.isfile(projFile):
443  raise argparse.ArgumentTypeError('Project file "%s" doesn\'t exist.' % projFile)
444  return tessDir
445 
def validateTessDir(tessDir)
Definition: tesshelper.py:436

Variable Documentation

string tesshelper.epilogStr = r""

Definition at line 31 of file tesshelper.py.

tesshelper.fileNodeTemplate = \

Definition at line 79 of file tesshelper.py.

string tesshelper.NEWHEADERS_FILENAME = "newheaders.txt"

Definition at line 76 of file tesshelper.py.

string tesshelper.NEWSOURCES_FILENAME = "newsources.txt"

Definition at line 77 of file tesshelper.py.

string tesshelper.PROJ_SUBDIR = r"vs2008\libtesseract"

Definition at line 73 of file tesshelper.py.

string tesshelper.PROJFILE = "libtesseract.vcproj"

Definition at line 74 of file tesshelper.py.

string tesshelper.VERSION = "1.0 %s"

Definition at line 72 of file tesshelper.py.