Пример #1
0
def ocr_djvu(opt, filename, task_scheduler=None):

    if type(filename) == type(u''):
        filename = filename.encode('utf-8')

    print "Starting to process:", filename

    if not opt.out_dir.endswith('/'):
        opt.out_dir += '/'

    nr_pages = get_nr_pages_djvu(filename)
    if nr_pages == None:
        print >> sys.stderr, "unable to get_nr_pages for file:", filename
        return False

    if opt.num_thread == -1:
        opt.num_thread = multiprocessing.cpu_count()
        if not task_scheduler:
            opt.num_thread = max(int(opt.num_thread / 2), 1)

    if opt.num_thread == 1:
        for nr in range(1, nr_pages + 1):
            if not opt.silent:
                utils.safe_write(sys.stderr,
                                 str(nr) + '/' + str(nr_pages) + '\r')
            do_one_page(opt, nr, filename)
    else:
        thread_array = []
        job_queue = multiprocessing.Queue(opt.num_thread)
        args = (job_queue, opt, filename)
        for i in range(opt.num_thread):
            if not opt.silent:
                print "starting thread"
            t = multiprocessing.Process(target=do_file, args=args)
            t.daemon = True
            t.start()
            if task_scheduler:
                task_scheduler.job_started(t)
            thread_array.append(t)

        for nr in range(1, nr_pages + 1):
            if not opt.silent:
                utils.safe_write(sys.stderr,
                                 str(nr) + '/' + str(nr_pages) + '\r')
            job_queue.put(nr)

        for i in range(opt.num_thread):
            job_queue.put(None)

        while len(thread_array):
            for i in range(len(thread_array) - 1, -1, -1):
                try:
                    thread_array[i].join()
                    del thread_array[i]
                except OSError, ose:
                    if ose.errno != errno.EINTR:
                        raise ose

        if not opt.silent:
            print "all thread finished"
Пример #2
0
def ocr_djvu(opt, filename, task_scheduler = None):

    if type(filename) == type(u''):
        filename = filename.encode('utf-8')

    print "Starting to process:", filename

    if not opt.out_dir.endswith('/'):
        opt.out_dir += '/'

    nr_pages = get_nr_pages_djvu(filename)
    if nr_pages == None:
        print >> sys.stderr, "unable to get_nr_pages for file:", filename
        return False

    if opt.num_thread == -1:
        opt.num_thread = multiprocessing.cpu_count()
        if not task_scheduler:
            opt.num_thread = max(int(opt.num_thread/2), 1)

    if opt.num_thread == 1:
        for nr in range(1, nr_pages + 1):
            if not opt.silent:
                utils.safe_write(sys.stderr, str(nr) + '/' + str(nr_pages) + '\r')
            do_one_page(opt, nr, filename)
    else:
        thread_array = []
        job_queue = multiprocessing.Queue(opt.num_thread)
        args = (job_queue, opt, filename)
        for i in range(opt.num_thread):
            if not opt.silent:
                print "starting thread"
            t = multiprocessing.Process(target=do_file, args=args)
            t.daemon = True
            t.start()
            if task_scheduler:
                task_scheduler.job_started(t)
            thread_array.append(t)

        for nr in range(1, nr_pages + 1):
            if not opt.silent:
                utils.safe_write(sys.stderr, str(nr) + '/' + str(nr_pages) + '\r')
            job_queue.put(nr)

        for i in range(opt.num_thread):
            job_queue.put(None)

        while len(thread_array):
            for i in range(len(thread_array) - 1, -1, -1):
                try:
                    thread_array[i].join()
                    del thread_array[i]
                except OSError, ose:
                    if ose.errno != errno.EINTR:
                        raise ose

        if not opt.silent:
            print "all thread finished"
Пример #3
0
            job_queue.put(None)

        while len(thread_array):
            for i in range(len(thread_array) - 1, -1, -1):
                try:
                    thread_array[i].join()
                    del thread_array[i]
                except OSError, ose:
                    if ose.errno != errno.EINTR:
                        raise ose

        if not opt.silent:
            print "all thread finished"

    if not opt.silent:
        utils.safe_write(sys.stderr, "\n")

    return True


def default_options():
    class Options:
        pass

    options = Options()
    options.config = ''
    options.num_thread = 1
    options.base_files = []
    options.compress = None
    options.silent = False
    options.out_dir = './'
Пример #4
0
            job_queue.put(None)

        while len(thread_array):
            for i in range(len(thread_array) - 1, -1, -1):
                try:
                    thread_array[i].join()
                    del thread_array[i]
                except OSError, ose:
                    if ose.errno != errno.EINTR:
                        raise ose

        if not opt.silent:
            print "all thread finished"

    if not opt.silent:
        utils.safe_write(sys.stderr, "\n")

    return True

def default_options():
    class Options:
        pass

    options = Options()
    options.config = ''
    options.num_thread = 1
    options.base_files = []
    options.compress = None
    options.silent = False
    options.out_dir = './'