Exemplo n.º 1
0
def main():
    parser = optparse.OptionParser(
        usage="Usage: %prog [-o outfile] truthfile boxfile")
    parser.add_option(
        '-o',
        '--output',
        dest='output',
        action='store',
        type='str',
        default='aligned.box',
        help='Output file. Overwrites existing files with no warning.')
    parser.add_option(
        '-v',
        '--verbose',
        dest='verbose',
        action='store_true',
        help=
        "Print shifted lines as they are encountered and metrics about the number of lines affected."
    )
    (opts, args) = parser.parse_args()

    if len(args) != 2:
        parser.print_help()
        return 0

    boxes = parse_boxfile(args[1])
    glyphs = get_glyphs(args[0])

    if len(boxes) != len(glyphs):
        raise ValueError(
            "Mismatch between number of boxes (%d) and number of glyphs (%d)."
            % (len(boxes), len(glyphs)))

    else:
        align_boxes(boxes, glyphs, opts.verbose)

    with codecs.open(opts.output, mode='wb', encoding='utf-8') as outfile:
        for box in boxes:
            outfile.write(unicode(box) + u'\n')
Exemplo n.º 2
0
def main():
    parser = optparse.OptionParser(usage="Usage: %prog [-t threshold] boxfile")
    parser.add_option('-t', '--threshold', dest='threshold', action='store',
                      type='int', default=1, help='Adjacent boxes separated horizontally by THRESHOLD or fewer pixels will be merged. Horizontal separation is ignored. Note that this means that boxes located on different lines might be merged in certain (rare) circumstances. Defaults to 1 (boxes are adjacent).')
    parser.add_option('-o', '--output', dest='output', action='store',
                      type='str', default='merged.box', help='Output file. Will overwrite existing files.')
    parser.add_option('-d', '--dry', dest='dry', action='store_true',
                      help='Perform a dry run. No files will be written, info about number of merged boxes will be output to the command line.')
    (opts, args) = parser.parse_args()

    if len(args) != 1:
        parser.print_help()
        return 0

    boxes = parse_boxfile(args[0])
    (merged,stats) = merge_nearby_boxes(opts,boxes)

    if opts.dry:
        print "Merged %d out of %d boxes. Outputting %d boxes." %(stats["num_merged"], stats["total_in"], stats["total_out"])
    else:
        with codecs.open(opts.output, mode='wb',encoding='utf-8') as outfile:
            for box in merged:
                outfile.write(unicode(box)+u'\n')
Exemplo n.º 3
0
def main():
    parser = optparse.OptionParser(usage="Usage: %prog [-o outfile] truthfile boxfile")
    parser.add_option('-o', '--output', dest='output', action='store',
                      type='str', default='aligned.box', help='Output file. Overwrites existing files with no warning.')
    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
                      help="Print shifted lines as they are encountered and metrics about the number of lines affected.")
    (opts, args) = parser.parse_args()

    if len(args) != 2:
        parser.print_help()
        return 0

    boxes = parse_boxfile(args[1])
    glyphs = get_glyphs(args[0])

    if len(boxes) != len(glyphs):
        raise ValueError("Mismatch between number of boxes (%d) and number of glyphs (%d)." %(len(boxes),len(glyphs)))

    else:
        align_boxes(boxes,glyphs,opts.verbose)

    with codecs.open(opts.output,mode='wb',encoding='utf-8') as outfile:
        for box in boxes:
            outfile.write(unicode(box)+u'\n')