def ocr_djvu(opt, filename, task_scheduler=None): if type(filename) == type(u''): filename = filename.encode('utf-8') print "Starting to process:", filename if not opt.out_dir.endswith('/'): opt.out_dir += '/' nr_pages = get_nr_pages_djvu(filename) if nr_pages == None: print >> sys.stderr, "unable to get_nr_pages for file:", filename return False if opt.num_thread == -1: opt.num_thread = multiprocessing.cpu_count() if not task_scheduler: opt.num_thread = max(int(opt.num_thread / 2), 1) if opt.num_thread == 1: for nr in range(1, nr_pages + 1): if not opt.silent: utils.safe_write(sys.stderr, str(nr) + '/' + str(nr_pages) + '\r') do_one_page(opt, nr, filename) else: thread_array = [] job_queue = multiprocessing.Queue(opt.num_thread) args = (job_queue, opt, filename) for i in range(opt.num_thread): if not opt.silent: print "starting thread" t = multiprocessing.Process(target=do_file, args=args) t.daemon = True t.start() if task_scheduler: task_scheduler.job_started(t) thread_array.append(t) for nr in range(1, nr_pages + 1): if not opt.silent: utils.safe_write(sys.stderr, str(nr) + '/' + str(nr_pages) + '\r') job_queue.put(nr) for i in range(opt.num_thread): job_queue.put(None) while len(thread_array): for i in range(len(thread_array) - 1, -1, -1): try: thread_array[i].join() del thread_array[i] except OSError, ose: if ose.errno != errno.EINTR: raise ose if not opt.silent: print "all thread finished"
def ocr_djvu(opt, filename, task_scheduler = None): if type(filename) == type(u''): filename = filename.encode('utf-8') print "Starting to process:", filename if not opt.out_dir.endswith('/'): opt.out_dir += '/' nr_pages = get_nr_pages_djvu(filename) if nr_pages == None: print >> sys.stderr, "unable to get_nr_pages for file:", filename return False if opt.num_thread == -1: opt.num_thread = multiprocessing.cpu_count() if not task_scheduler: opt.num_thread = max(int(opt.num_thread/2), 1) if opt.num_thread == 1: for nr in range(1, nr_pages + 1): if not opt.silent: utils.safe_write(sys.stderr, str(nr) + '/' + str(nr_pages) + '\r') do_one_page(opt, nr, filename) else: thread_array = [] job_queue = multiprocessing.Queue(opt.num_thread) args = (job_queue, opt, filename) for i in range(opt.num_thread): if not opt.silent: print "starting thread" t = multiprocessing.Process(target=do_file, args=args) t.daemon = True t.start() if task_scheduler: task_scheduler.job_started(t) thread_array.append(t) for nr in range(1, nr_pages + 1): if not opt.silent: utils.safe_write(sys.stderr, str(nr) + '/' + str(nr_pages) + '\r') job_queue.put(nr) for i in range(opt.num_thread): job_queue.put(None) while len(thread_array): for i in range(len(thread_array) - 1, -1, -1): try: thread_array[i].join() del thread_array[i] except OSError, ose: if ose.errno != errno.EINTR: raise ose if not opt.silent: print "all thread finished"
job_queue.put(None) while len(thread_array): for i in range(len(thread_array) - 1, -1, -1): try: thread_array[i].join() del thread_array[i] except OSError, ose: if ose.errno != errno.EINTR: raise ose if not opt.silent: print "all thread finished" if not opt.silent: utils.safe_write(sys.stderr, "\n") return True def default_options(): class Options: pass options = Options() options.config = '' options.num_thread = 1 options.base_files = [] options.compress = None options.silent = False options.out_dir = './'