def main(argv): if len(argv) > 0 and argv[0] == '-h': usage() sys.exit(0) if not os.path.isdir('./' + outdir+ '/'): os.mkdir('./' + outdir + '/') id = iarchive.infer_book_id() iabook = iarchive.Book(id, '', '.') visualize(iabook)
def main(argv): out_name = None import getopt try: opts, args = getopt.getopt(argv, 'dho:', ['debug', 'help', 'outfile=', 'document=', 'daisy', 'epub', 'test', 'report', 'hocr', 'toc=']) except getopt.GetoptError: usage() sys.exit(-1) debug_output = False found_output_opt = False make_epub = False make_daisy = False make_test = False make_report = False make_hocr = False toc = None doc = '' for opt, arg in opts: if opt in ('-h', '--help'): usage() sys.exit() elif opt in ('-d', '--debug'): debug_output = True elif opt in ('--daisy'): make_daisy = True found_output_opt = True elif opt in ('--epub'): make_epub = True found_output_opt = True elif opt in ('--test'): make_test = True found_output_opt = True elif opt in ('--report'): make_report = True found_output_opt = True elif opt in ('--hocr'): make_hocr = True found_output_opt = True elif opt in ('-o', '--outfile'): out_name = arg elif opt in ('--document'): doc = arg elif opt in ('--toc'): if len(arg) > 0: toc = json.loads(arg) if toc is not None and len(toc) > 0: # accept openlibrary toc format (array of tocitems) # or original bespoke format: hash of pagenum -> title try: item0 = toc[0] oldtoc = toc toc = {} for tocitem in oldtoc: chapterstr = None if 'pagenum' in tocitem: if 'label' in tocitem and 'title' in tocitem: chapterstr = '%s - %s' % (tocitem['label'], tocitem['title']) elif 'label' in tocitem: chapterstr = tocitem['label'] elif 'title' in tocitem: chapterstr = tocitem['title'] if chapterstr is not None: toc[tocitem['pagenum']] = chapterstr except TypeError: toc = None except KeyError: # there must be a better way to detect an array... toc = None if not found_output_opt: make_epub = True if len(args) == 0: book_id = iarchive.infer_book_id() if book_id is None: print 'No args given and no book found in current directory' usage() sys.exit(-1) book_path = '.' elif len(args) == 1: book_id = args[0] if not os.path.exists(book_id): print 'Only book_id arg given, and no corresponding book dir found' usage() sys.exit(-1) book_path = book_id elif len(args) == 2: book_id = args[0] book_path = args[1] elif len(args) == 3: if out_name is not None: print 'outfile found as 3rd argument, but outfile is already specified via -o' usage() sys.exit(-1) book_id = args[0] book_path = args[1] out_name = args[2] else: print 'unrecognized extra arguments ' + args[3:] usage() sys.exit(-1) if out_name is None: if len(doc) > 0: out_root = os.path.basename(doc) else: out_root = book_id if make_daisy: out_name = out_root + '_daisy.zip' elif make_test: out_name = out_root + '.test' elif make_report: out_name = out_root + '.report' elif make_hocr: out_name = out_root + '.html' else: out_name = out_root + '.epub' iabook = iarchive.Book(book_id, doc, book_path, toc=toc) metadata = iabook.get_metadata() if make_daisy: ebook = daisy.Book(out_name, metadata) alt_booktext = "This is a protected daisy format book. If you are hearing this message, then your device is missing the appropriate key to read this book. For more information, see the archive.org daisy faq." # iabook_to_daisy.process_book(iabook, ebook, alt_booktext) iabook_to_daisy.process_book(iabook, ebook) elif make_test: print iabook.analyze() sys.exit(0) elif make_report: print iabook.report() sys.exit(0) elif make_hocr: raise 'NYI' iabook_to_hocr.process_book(iabook) else: ebook = epub.Book(out_name, metadata) iabook_to_epub.process_book(iabook, ebook) ebook.finish(metadata) if debug_output: if make_daisy: output = os.popen('rm -rf daisy_debug') output.read() output = os.popen('unzip -d daisy_debug ' + out_name) output.read() zedval = os.path.join(sys.path[0], 'Zedval/ZedVal.jar') opf_file = os.path.join('daisy_debug', iabook.get_book_id() + '_daisy.opf') output = os.popen('java -Xms128m -Xmx256m -jar ' + zedval + ' ' + opf_file) else: epubcheck = os.path.join(sys.path[0], 'epubcheck/epubcheck-1.1.jar') output = os.popen('java -jar ' + epubcheck + ' ' + out_name) print output.read()
def main(argv): import optparse parser = optparse.OptionParser() parser = optparse.OptionParser(usage='usage: %prog [options]', version='%prog 0.1', description='A visualizer for ' 'coordinate-annotated OCR data.') def legend_callback(option, opt_str, value, parser): legend() sys.exit(0) parser.add_option('--legend', '-l', action='callback', callback=legend_callback, help='Display legend information - for generated images') parser.add_option('--reduce', action='store', type='int', metavar='n', default=2, help='For jp2 input images, reduce jp2 resolution ' 'by 2 ^ n when reading ' 'original image, for speed. This also reduces the ' 'output scale by 2 ^ n, unless otherwise specified ' 'with --scale.') parser.add_option('--scale', action='store', type='int', default=0, help='Scale result images down from original scan ' 'resolution.') parser.add_option('--last', action='store', type='int', metavar='leaf', default=0, help='Stop generating output leaves ' 'after the specified leaf') parser.add_option('--first', action='store', type='int', metavar='leaf', default=0, help='Don\'t generate output leaves until the ' 'specified leaf') parser.add_option('--leaf', action='store', type='int', metavar='leaf', default=0, help='Only generate output for the specified leaf') parser.add_option('--text', action='store_true', default=False, help='Generate output characters for OCRed ' 'text in input files') parser.add_option('--outdir', help='Output directory. Default is source_type + \'_viz\'') parser.add_option('--source', choices=['abbyy', 'pdftoxml', 'djvu'], default='abbyy', help='Which source to use for OCR data/coordinates.') parser.add_option('--show-opts', action='store_true', # help=optparse.SUPPRESS_HELP help='Display parsed options/defaults and exit') global opts opts, args = parser.parse_args(argv) if opts.reduce < 0 or opts.reduce > 4: parser.error('--reduce must be between 0 and 4') if opts.scale == 0: opts.scale = 2 ** opts.reduce if opts.leaf != 0: if opts.first > 0 or opts.last > 0: parser.error('can\'t specify --last or --first with --leaf') opts.last = opts.first = opts.leaf if opts.source == 'djvu': parser.error('--source=djvu not supported at the moment') if opts.outdir is None: opts.outdir = opts.source + '_viz' if opts.show_opts: print 'Options: ' + str(opts) print 'Args: ' + str(args) sys.exit(0) parser.destroy() if not os.path.isdir('./' + opts.outdir + '/'): os.mkdir('./' + opts.outdir + '/') id = iarchive.infer_book_id() iabook = iarchive.Book(id, '', '.') visualize(iabook)