def run(self): job = ParallelJob('move_library', 'Move library from %s to %s'%(self.from_, self.to), lambda x,y:x, args=[self.from_, self.to]) server = Server(pool_size=1) server.add_job(job) while not job.is_finished: time.sleep(0.2) job.update(consume_notifications=False) while True: try: title = job.notifications.get_nowait()[0] self.count += 1 self.result_queue.put((float(self.count)/self.total, title)) except Empty: break job.update() server.close() if not job.result: self.failed = True self.details = job.details if os.path.exists(job.log_path): os.remove(job.log_path)
def set_metadata(stream, mi): if not podofo: raise Unavailable(podofo_err) with TemporaryFile('_podofo_read.pdf') as inputf, \ TemporaryFile('_podofo_write.pdf') as outputf: server = Server(pool_size=1) with open(inputf, 'wb') as f: shutil.copyfileobj(stream, f) job = ParallelJob('write_pdf_metadata', 'Write pdf metadata', lambda x,y:x, args=[inputf, outputf, mi.title, mi.authors, mi.book_producer, mi.tags]) server.add_job(job) while not job.is_finished: time.sleep(0.1) job.update() job.update() server.close() if job.failed: prints(job.details) elif job.result: with open(outputf, 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) stream.seek(0) stream.truncate() shutil.copyfileobj(f, stream) stream.flush() stream.seek(0)
def get_metadata(stream, cpath=None): if not podofo: raise Unavailable(podofo_err) pt = PersistentTemporaryFile('_podofo.pdf') pt.write(stream.read()) pt.close() server = Server(pool_size=1) job = ParallelJob('read_pdf_metadata', 'Read pdf metadata', lambda x,y:x, args=[pt.name, cpath]) server.add_job(job) while not job.is_finished: time.sleep(0.1) job.update() job.update() server.close() if job.result is None: raise ValueError('Failed to read metadata: ' + job.details) title, authors, creator, tags, ok = job.result if not ok: print 'Failed to extract cover:' print job.details if title == '_': title = getattr(stream, 'name', _('Unknown')) title = os.path.splitext(title)[0] mi = MetaInformation(title, authors) if creator: mi.book_producer = creator if tags: mi.tags = tags if os.path.exists(pt.name): os.remove(pt.name) if ok: mi.cover = cpath return mi
def do_store_locations(books_to_scan, options, notification=lambda x,y:x): ''' Master job, to launch child jobs to modify each ePub ''' debug_print("start") server = Server() debug_print("options=%s" % (options)) # Queue all the jobs # args = ['calibre_plugins.sonyutilities.jobs', 'do_sonyutilities_all', args = ['calibre_plugins.sonyutilities.jobs', 'do_store_bookmarks', (books_to_scan, options)] # debug_print("args=%s" % (args)) debug_print("len(books_to_scan)=%d" % (len(books_to_scan))) job = ParallelJob('arbitrary', "Store locations", done=None, args=args) server.add_job(job) # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator notification(0.01, 'Reading device database') # dequeue the job results as they arrive, saving the results total = 1 count = 0 stored_locations = dict() while True: job = server.changed_jobs_queue.get() # A job can 'change' when it is not finished, for example if it # produces a notification. Ignore these. job.update() if not job.is_finished: debug_print("Job not finished") continue # debug_print("Job finished") # A job really finished. Get the information. stored_locations = job.result import pydevd;pydevd.settrace() # book_id = job._book_id # stored_locations[book_id] = stored_location count += 1 notification(float(count)/total, 'Storing locations') # Add this job's output to the current log #debug_print("Stored_location=", stored_locations) number_bookmarks = len(stored_locations) if stored_locations else 0 debug_print("Stored_location count=%d" % number_bookmarks) debug_print(job.details) if count >= total: # All done! break server.close() debug_print("finished") # return the map as the job result return stored_locations, options
def run(self): jobs, ids = set([]), set([]) for t in self.tasks: for b in t: ids.add(b[0]) progress = Progress(self.result_queue, self.tdir) server = Server() if self.spare_server is None else self.spare_server try: for i, task in enumerate(self.tasks): job = ParallelJob('read_metadata', 'Read metadata (%d of %d)'%(i, len(self.tasks)), lambda x,y:x, args=[task, self.tdir]) jobs.add(job) server.add_job(job) while not self.canceled: time.sleep(0.2) running = False for job in jobs: while True: try: id = job.notifications.get_nowait()[-1] if id in ids: progress(id) ids.remove(id) except Empty: break job.update(consume_notifications=False) if not job.is_finished: running = True if not running: break finally: server.close() time.sleep(1) if self.canceled: return for id in ids: progress(id) for job in jobs: if job.failed: prints(job.details) if os.path.exists(job.log_path): try: os.remove(job.log_path) except: pass
def __init__(self): QAbstractTableModel.__init__(self) SearchQueryParser.__init__(self, ['all']) self.wait_icon = (QIcon(I('jobs.png'))) self.running_icon = (QIcon(I('exec.png'))) self.error_icon = (QIcon(I('dialog_error.png'))) self.done_icon = (QIcon(I('ok.png'))) self.jobs = [] self.add_job = Dispatcher(self._add_job) self.server = Server(limit=int(config['worker_limit'] / 2.0), enforce_cpu_limit=config['enforce_cpu_limit']) self.threaded_server = ThreadedJobServer() self.changed_queue = Queue() self.timer = QTimer(self) self.timer.timeout.connect(self.update, type=Qt.QueuedConnection) self.timer.start(1000)
def set_metadata(stream, mi): if not podofo: raise Unavailable(podofo_err) with TemporaryFile('_podofo_read.pdf') as inputf, \ TemporaryFile('_podofo_write.pdf') as outputf: server = Server(pool_size=1) with open(inputf, 'wb') as f: shutil.copyfileobj(stream, f) job = ParallelJob('write_pdf_metadata', 'Write pdf metadata', lambda x, y: x, args=[ inputf, outputf, mi.title, mi.authors, mi.book_producer, mi.tags ]) server.add_job(job) while not job.is_finished: time.sleep(0.1) job.update() job.update() server.close() if job.failed: prints(job.details) elif job.result: with open(outputf, 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) stream.seek(0) stream.truncate() shutil.copyfileobj(f, stream) stream.flush() stream.seek(0)
def run(self): job = ParallelJob('move_library', 'Move library from %s to %s' % (self.from_, self.to), lambda x, y: x, args=[self.from_, self.to]) server = Server(pool_size=1) server.add_job(job) while not job.is_finished: time.sleep(0.2) job.update(consume_notifications=False) while True: try: title = job.notifications.get_nowait()[0] self.count += 1 self.result_queue.put( (float(self.count) / self.total, title)) except Empty: break job.update() server.close() if not job.result: self.failed = True self.details = job.details if os.path.exists(job.log_path): os.remove(job.log_path)
def run(self): jobs, ids = set([]), set([]) for t in self.tasks: for b in t: ids.add(b[0]) progress = Progress(self.result_queue, self.tdir) server = Server() if self.spare_server is None else self.spare_server try: for i, task in enumerate(self.tasks): job = ParallelJob('read_metadata', 'Read metadata (%d of %d)' % (i, len(self.tasks)), lambda x, y: x, args=[task, self.tdir]) jobs.add(job) server.add_job(job) while not self.canceled: time.sleep(0.2) running = False for job in jobs: while True: try: id = job.notifications.get_nowait()[-1] if id in ids: progress(id) ids.remove(id) except Empty: break job.update(consume_notifications=False) if not job.is_finished: running = True if not running: break finally: server.close() time.sleep(1) if self.canceled: return for id in ids: progress(id) for job in jobs: if job.failed: prints(job.details) if os.path.exists(job.log_path): try: os.remove(job.log_path) except: pass
def __init__(self): QAbstractTableModel.__init__(self) SearchQueryParser.__init__(self, ["all"]) self.wait_icon = QVariant(QIcon(I("jobs.png"))) self.running_icon = QVariant(QIcon(I("exec.png"))) self.error_icon = QVariant(QIcon(I("dialog_error.png"))) self.done_icon = QVariant(QIcon(I("ok.png"))) self.jobs = [] self.add_job = Dispatcher(self._add_job) self.server = Server(limit=int(config["worker_limit"] / 2.0), enforce_cpu_limit=config["enforce_cpu_limit"]) self.threaded_server = ThreadedJobServer() self.changed_queue = Queue() self.timer = QTimer(self) self.timer.timeout.connect(self.update, type=Qt.QueuedConnection) self.timer.start(1000)
def process_pages(pages, opts, update, tdir): ''' Render all identified comic pages. ''' progress = Progress(len(pages), update) server = Server() jobs = [] tasks = [(p, os.path.join(tdir, os.path.basename(p))) for p in pages] tasks = server.split(pages) for task in tasks: jobs.append( ParallelJob('render_pages', '', progress, args=[task, tdir, opts])) server.add_job(jobs[-1]) while True: time.sleep(1) running = False for job in jobs: while True: try: x = job.notifications.get_nowait() progress(*x) except Empty: break job.update() if not job.is_finished: running = True if not running: break server.close() ans, failures = [], [] for job in jobs: if job.failed or job.result is None: raise Exception( _('Failed to process comic: \n\n%s') % job.log_file.read()) pages, failures_ = job.result ans += pages failures += failures_ return ans, failures
def process_pages(pages, opts, update, tdir): ''' Render all identified comic pages. ''' progress = Progress(len(pages), update) server = Server() jobs = [] tasks = [(p, os.path.join(tdir, os.path.basename(p))) for p in pages] tasks = server.split(pages) for task in tasks: jobs.append(ParallelJob('render_pages', '', progress, args=[task, tdir, opts])) server.add_job(jobs[-1]) while True: time.sleep(1) running = False for job in jobs: while True: try: x = job.notifications.get_nowait() progress(*x) except Empty: break job.update() if not job.is_finished: running = True if not running: break server.close() ans, failures = [], [] for job in jobs: if job.failed or job.result is None: raise Exception(_('Failed to process comic: \n\n%s')% job.log_file.read()) pages, failures_ = job.result ans += pages failures += failures_ return ans, failures
def get_metadata(stream, cpath=None): if not podofo: raise Unavailable(podofo_err) pt = PersistentTemporaryFile('_podofo.pdf') pt.write(stream.read()) pt.close() server = Server(pool_size=1) job = ParallelJob('read_pdf_metadata', 'Read pdf metadata', lambda x, y: x, args=[pt.name, cpath]) server.add_job(job) while not job.is_finished: time.sleep(0.1) job.update() job.update() server.close() if job.result is None: raise ValueError('Failed to read metadata: ' + job.details) title, authors, creator, tags, ok = job.result if not ok: print 'Failed to extract cover:' print job.details if title == '_': title = getattr(stream, 'name', _('Unknown')) title = os.path.splitext(title)[0] mi = MetaInformation(title, authors) if creator: mi.book_producer = creator if tags: mi.tags = tags if os.path.exists(pt.name): os.remove(pt.name) if ok: mi.cover = cpath return mi
def do_download_worker(book_list, options, cpus, merge=False, notification=lambda x,y:x): ''' Master job, to launch child jobs to extract ISBN for a set of books This is run as a worker job in the background to keep the UI more responsive and get around the memory leak issues as it will launch a child job for each book as a worker process ''' server = Server(pool_size=cpus) logger.info(options['version']) total = 0 alreadybad = [] # Queue all the jobs logger.info("Adding jobs for URLs:") for book in book_list: logger.info("%s"%book['url']) if book['good']: total += 1 args = ['calibre_plugins.fanficfare_plugin.jobs', 'do_download_for_worker', (book,options,merge)] job = ParallelJob('arbitrary_n', "url:(%s) id:(%s)"%(book['url'],book['calibre_id']), done=None, args=args) job._book = book server.add_job(job) else: # was already bad before the subprocess ever started. alreadybad.append(book) # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator notification(0.01, _('Downloading FanFiction Stories')) # dequeue the job results as they arrive, saving the results count = 0 while True: job = server.changed_jobs_queue.get() # A job can 'change' when it is not finished, for example if it # produces a notification. Ignore these. job.update() if not job.is_finished: continue # A job really finished. Get the information. book_list.remove(job._book) book_list.append(job.result) book_id = job._book['calibre_id'] count = count + 1 notification(float(count)/total, '%d of %d stories finished downloading'%(count,total)) # Add this job's output to the current log logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title'])) logger.info(job.details) if count >= total: ## ordering first by good vs bad, then by listorder. good_list = filter(lambda x : x['good'], book_list) bad_list = filter(lambda x : not x['good'], book_list) good_list = sorted(good_list,key=lambda x : x['listorder']) bad_list = sorted(bad_list,key=lambda x : x['listorder']) logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(url)s %(comment)s" % book for book in good_list+bad_list]))) logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in good_list]))) logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in bad_list]))) break server.close() # return the book list as the job result return book_list
class JobManager(QAbstractTableModel, AdaptSQP): # {{{ job_added = pyqtSignal(int) job_done = pyqtSignal(int) def __init__(self): QAbstractTableModel.__init__(self) SearchQueryParser.__init__(self, ['all']) self.wait_icon = (QIcon(I('jobs.png'))) self.running_icon = (QIcon(I('exec.png'))) self.error_icon = (QIcon(I('dialog_error.png'))) self.done_icon = (QIcon(I('ok.png'))) self.jobs = [] self.add_job = Dispatcher(self._add_job) self.server = Server(limit=config['worker_limit'] // 2, enforce_cpu_limit=config['enforce_cpu_limit']) self.threaded_server = ThreadedJobServer() self.changed_queue = Queue() self.timer = QTimer(self) self.timer.timeout.connect(self.update, type=Qt.ConnectionType.QueuedConnection) self.timer.start(1000) def columnCount(self, parent=QModelIndex()): return 5 def rowCount(self, parent=QModelIndex()): return len(self.jobs) def headerData(self, section, orientation, role): if role != Qt.ItemDataRole.DisplayRole: return None if orientation == Qt.Orientation.Horizontal: return ({ 0: _('Job'), 1: _('Status'), 2: _('Progress'), 3: _('Running time'), 4: _('Start time'), }.get(section, '')) else: return (section + 1) def show_tooltip(self, arg): widget, pos = arg QToolTip.showText(pos, self.get_tooltip()) def get_tooltip(self): running_jobs = [j for j in self.jobs if j.run_state == j.RUNNING] waiting_jobs = [j for j in self.jobs if j.run_state == j.WAITING] lines = [ ngettext('There is a running job:', 'There are {} running jobs:', len(running_jobs)).format(len(running_jobs)) ] for job in running_jobs: desc = job.description if not desc: desc = _('Unknown job') p = 100. if job.is_finished else job.percent lines.append('%s: %.0f%% done' % (desc, p)) l = ngettext('There is a waiting job', 'There are {} waiting jobs', len(waiting_jobs)).format(len(waiting_jobs)) lines.extend(['', l]) for job in waiting_jobs: desc = job.description if not desc: desc = _('Unknown job') lines.append(desc) return '\n'.join(['calibre', ''] + lines) def data(self, index, role): try: if role not in (Qt.ItemDataRole.DisplayRole, Qt.ItemDataRole.DecorationRole): return None row, col = index.row(), index.column() job = self.jobs[row] if role == Qt.ItemDataRole.DisplayRole: if col == 0: desc = job.description if not desc: desc = _('Unknown job') return (desc) if col == 1: return (job.status_text) if col == 2: p = 100. if job.is_finished else job.percent return (p) if col == 3: rtime = job.running_time if rtime is None: return None return human_readable_interval(rtime) if col == 4 and job.start_time is not None: return (strftime('%H:%M -- %d %b', time.localtime(job.start_time))) if role == Qt.ItemDataRole.DecorationRole and col == 0: state = job.run_state if state == job.WAITING: return self.wait_icon if state == job.RUNNING: return self.running_icon if job.killed or job.failed: return self.error_icon return self.done_icon except: import traceback traceback.print_exc() return None def update(self): try: self._update() except BaseException: import traceback traceback.print_exc() def _update(self): # Update running time for i, j in enumerate(self.jobs): if j.run_state == j.RUNNING: idx = self.index(i, 3) self.dataChanged.emit(idx, idx) # Update parallel jobs jobs = set() while True: try: jobs.add(self.server.changed_jobs_queue.get_nowait()) except Empty: break # Update device jobs while True: try: jobs.add(self.changed_queue.get_nowait()) except Empty: break # Update threaded jobs while True: try: jobs.add(self.threaded_server.changed_jobs.get_nowait()) except Empty: break if jobs: needs_reset = False for job in jobs: orig_state = job.run_state job.update() if orig_state != job.run_state: needs_reset = True if job.is_finished: self.job_done.emit(len(self.unfinished_jobs())) if needs_reset: self.modelAboutToBeReset.emit() self.jobs.sort() self.modelReset.emit() else: for job in jobs: idx = self.jobs.index(job) self.dataChanged.emit(self.index(idx, 0), self.index(idx, 3)) # Kill parallel jobs that have gone on too long try: wmax_time = gprefs['worker_max_time'] * 60 except: wmax_time = 0 if wmax_time > 0: for job in self.jobs: if isinstance(job, ParallelJob): rtime = job.running_time if (rtime is not None and rtime > wmax_time and job.duration is None): job.timed_out = True self.server.kill_job(job) def _add_job(self, job): self.modelAboutToBeReset.emit() self.jobs.append(job) self.jobs.sort() self.job_added.emit(len(self.unfinished_jobs())) self.modelReset.emit() def done_jobs(self): return [j for j in self.jobs if j.is_finished] def unfinished_jobs(self): return [j for j in self.jobs if not j.is_finished] def row_to_job(self, row): return self.jobs[row] def rows_to_jobs(self, rows): return [self.jobs[row] for row in rows] def has_device_jobs(self, queued_also=False): for job in self.jobs: if isinstance(job, DeviceJob): if job.duration is None: # Running or waiting if (job.is_running or queued_also): return True return False def has_jobs(self): for job in self.jobs: if job.is_running: return True return False def run_job(self, done, name, args=[], kwargs={}, description='', core_usage=1): job = ParallelJob(name, description, done, args=args, kwargs=kwargs) job.core_usage = core_usage self.add_job(job) self.server.add_job(job) return job def run_threaded_job(self, job): self.add_job(job) self.threaded_server.add_job(job) def launch_gui_app(self, name, args=(), kwargs=None, description=''): job = ParallelJob(name, description, lambda x: x, args=list(args), kwargs=kwargs or {}) self.server.run_job(job, gui=True, redirect_output=False) def _kill_job(self, job): if isinstance(job, ParallelJob): self.server.kill_job(job) elif isinstance(job, ThreadedJob): self.threaded_server.kill_job(job) else: job.kill_on_start = True def hide_jobs(self, rows): for r in rows: self.jobs[r].hidden_in_gui = True for r in rows: self.dataChanged.emit(self.index(r, 0), self.index(r, 0)) def show_hidden_jobs(self): for j in self.jobs: j.hidden_in_gui = False for r in range(len(self.jobs)): self.dataChanged.emit(self.index(r, 0), self.index(r, 0)) def kill_job(self, job, view): if isinstance(job, DeviceJob): return error_dialog( view, _('Cannot kill job'), _('Cannot kill jobs that communicate with the device')).exec_( ) if job.duration is not None: return error_dialog(view, _('Cannot kill job'), _('Job has already run')).exec_() if not getattr(job, 'killable', True): return error_dialog(view, _('Cannot kill job'), _('This job cannot be stopped'), show=True) self._kill_job(job) def kill_multiple_jobs(self, jobs, view): devjobs = [j for j in jobs if isinstance(j, DeviceJob)] if devjobs: error_dialog( view, _('Cannot kill job'), _('Cannot kill jobs that communicate with the device')).exec_( ) jobs = [j for j in jobs if not isinstance(j, DeviceJob)] jobs = [j for j in jobs if j.duration is None] unkillable = [j for j in jobs if not getattr(j, 'killable', True)] if unkillable: names = '\n'.join(as_unicode(j.description) for j in unkillable) error_dialog( view, _('Cannot kill job'), _('Some of the jobs cannot be stopped. Click "Show details"' ' to see the list of unstoppable jobs.'), det_msg=names, show=True) jobs = [j for j in jobs if getattr(j, 'killable', True)] jobs = [j for j in jobs if j.duration is None] for j in jobs: self._kill_job(j) def kill_all_jobs(self): for job in self.jobs: if (isinstance(job, DeviceJob) or job.duration is not None or not getattr(job, 'killable', True)): continue self._kill_job(job) def terminate_all_jobs(self): self.server.killall() for job in self.jobs: if (isinstance(job, DeviceJob) or job.duration is not None or not getattr(job, 'killable', True)): continue if not isinstance(job, ParallelJob): self._kill_job(job) def universal_set(self): return { i for i, j in enumerate(self.jobs) if not getattr(j, 'hidden_in_gui', False) } def get_matches(self, location, query, candidates=None): if candidates is None: candidates = self.universal_set() ans = set() if not query: return ans query = lower(query) for j in candidates: job = self.jobs[j] if job.description and query in lower(job.description): ans.add(j) return ans def find(self, query): query = query.strip() rows = self.parse(query) return rows
class JobManager(QAbstractTableModel, SearchQueryParser): # {{{ job_added = pyqtSignal(int) job_done = pyqtSignal(int) def __init__(self): QAbstractTableModel.__init__(self) SearchQueryParser.__init__(self, ['all']) self.wait_icon = QVariant(QIcon(I('jobs.png'))) self.running_icon = QVariant(QIcon(I('exec.png'))) self.error_icon = QVariant(QIcon(I('dialog_error.png'))) self.done_icon = QVariant(QIcon(I('ok.png'))) self.jobs = [] self.add_job = Dispatcher(self._add_job) self.server = Server(limit=int(config['worker_limit']/2.0), enforce_cpu_limit=config['enforce_cpu_limit']) self.threaded_server = ThreadedJobServer() self.changed_queue = Queue() self.timer = QTimer(self) self.timer.timeout.connect(self.update, type=Qt.QueuedConnection) self.timer.start(1000) def columnCount(self, parent=QModelIndex()): return 5 def rowCount(self, parent=QModelIndex()): return len(self.jobs) def headerData(self, section, orientation, role): if role != Qt.DisplayRole: return NONE if orientation == Qt.Horizontal: return QVariant({ 0: _('Job'), 1: _('Status'), 2: _('Progress'), 3: _('Running time'), 4: _('Start time'), }.get(section, '')) else: return QVariant(section+1) def show_tooltip(self, arg): widget, pos = arg QToolTip.showText(pos, self.get_tooltip()) def get_tooltip(self): running_jobs = [j for j in self.jobs if j.run_state == j.RUNNING] waiting_jobs = [j for j in self.jobs if j.run_state == j.WAITING] lines = [_('There are %d running jobs:')%len(running_jobs)] for job in running_jobs: desc = job.description if not desc: desc = _('Unknown job') p = 100. if job.is_finished else job.percent lines.append('%s: %.0f%% done'%(desc, p)) lines.extend(['', _('There are %d waiting jobs:')%len(waiting_jobs)]) for job in waiting_jobs: desc = job.description if not desc: desc = _('Unknown job') lines.append(desc) return '\n'.join(['calibre', '']+ lines) def data(self, index, role): try: if role not in (Qt.DisplayRole, Qt.DecorationRole): return NONE row, col = index.row(), index.column() job = self.jobs[row] if role == Qt.DisplayRole: if col == 0: desc = job.description if not desc: desc = _('Unknown job') return QVariant(desc) if col == 1: return QVariant(job.status_text) if col == 2: p = 100. if job.is_finished else job.percent return QVariant(p) if col == 3: rtime = job.running_time if rtime is None: return NONE return QVariant('%dm %ds'%(int(rtime)//60, int(rtime)%60)) if col == 4 and job.start_time is not None: return QVariant(time.strftime('%H:%M -- %d %b', time.localtime(job.start_time))) if role == Qt.DecorationRole and col == 0: state = job.run_state if state == job.WAITING: return self.wait_icon if state == job.RUNNING: return self.running_icon if job.killed or job.failed: return self.error_icon return self.done_icon except: import traceback traceback.print_exc() return NONE def update(self): try: self._update() except BaseException: import traceback traceback.print_exc() def _update(self): # Update running time for i, j in enumerate(self.jobs): if j.run_state == j.RUNNING: idx = self.index(i, 3) self.dataChanged.emit(idx, idx) # Update parallel jobs jobs = set([]) while True: try: jobs.add(self.server.changed_jobs_queue.get_nowait()) except Empty: break # Update device jobs while True: try: jobs.add(self.changed_queue.get_nowait()) except Empty: break # Update threaded jobs while True: try: jobs.add(self.threaded_server.changed_jobs.get_nowait()) except Empty: break if jobs: needs_reset = False for job in jobs: orig_state = job.run_state job.update() if orig_state != job.run_state: needs_reset = True if job.is_finished: self.job_done.emit(len(self.unfinished_jobs())) if needs_reset: self.layoutAboutToBeChanged.emit() self.jobs.sort() self.layoutChanged.emit() else: for job in jobs: idx = self.jobs.index(job) self.dataChanged.emit( self.index(idx, 0), self.index(idx, 3)) # Kill parallel jobs that have gone on too long try: wmax_time = gprefs['worker_max_time'] * 60 except: wmax_time = 0 if wmax_time > 0: for job in self.jobs: if isinstance(job, ParallelJob): rtime = job.running_time if (rtime is not None and rtime > wmax_time and job.duration is None): job.timed_out = True self.server.kill_job(job) def _add_job(self, job): self.layoutAboutToBeChanged.emit() self.jobs.append(job) self.jobs.sort() self.job_added.emit(len(self.unfinished_jobs())) self.layoutChanged.emit() def done_jobs(self): return [j for j in self.jobs if j.is_finished] def unfinished_jobs(self): return [j for j in self.jobs if not j.is_finished] def row_to_job(self, row): return self.jobs[row] def has_device_jobs(self, queued_also=False): for job in self.jobs: if isinstance(job, DeviceJob): if job.duration is None: # Running or waiting if (job.is_running or queued_also): return True return False def has_jobs(self): for job in self.jobs: if job.is_running: return True return False def run_job(self, done, name, args=[], kwargs={}, description='', core_usage=1): job = ParallelJob(name, description, done, args=args, kwargs=kwargs) job.core_usage = core_usage self.add_job(job) self.server.add_job(job) return job def run_threaded_job(self, job): self.add_job(job) self.threaded_server.add_job(job) def launch_gui_app(self, name, args=[], kwargs={}, description=''): job = ParallelJob(name, description, lambda x: x, args=args, kwargs=kwargs) self.server.run_job(job, gui=True, redirect_output=False) def _kill_job(self, job): if isinstance(job, ParallelJob): self.server.kill_job(job) elif isinstance(job, ThreadedJob): self.threaded_server.kill_job(job) else: job.kill_on_start = True def hide_jobs(self, rows): for r in rows: self.jobs[r].hidden_in_gui = True for r in rows: self.dataChanged.emit(self.index(r, 0), self.index(r, 0)) def show_hidden_jobs(self): for j in self.jobs: j.hidden_in_gui = False for r in xrange(len(self.jobs)): self.dataChanged.emit(self.index(r, 0), self.index(r, 0)) def kill_job(self, row, view): job = self.jobs[row] if isinstance(job, DeviceJob): return error_dialog(view, _('Cannot kill job'), _('Cannot kill jobs that communicate with the device')).exec_() if job.duration is not None: return error_dialog(view, _('Cannot kill job'), _('Job has already run')).exec_() if not getattr(job, 'killable', True): return error_dialog(view, _('Cannot kill job'), _('This job cannot be stopped'), show=True) self._kill_job(job) def kill_multiple_jobs(self, rows, view): jobs = [self.jobs[row] for row in rows] devjobs = [j for j in jobs if isinstance(j, DeviceJob)] if devjobs: error_dialog(view, _('Cannot kill job'), _('Cannot kill jobs that communicate with the device')).exec_() jobs = [j for j in jobs if not isinstance(j, DeviceJob)] jobs = [j for j in jobs if j.duration is None] unkillable = [j for j in jobs if not getattr(j, 'killable', True)] if unkillable: names = u'\n'.join(as_unicode(j.description) for j in unkillable) error_dialog(view, _('Cannot kill job'), _('Some of the jobs cannot be stopped. Click Show details' ' to see the list of unstoppable jobs.'), det_msg=names, show=True) jobs = [j for j in jobs if getattr(j, 'killable', True)] jobs = [j for j in jobs if j.duration is None] for j in jobs: self._kill_job(j) def kill_all_jobs(self): for job in self.jobs: if (isinstance(job, DeviceJob) or job.duration is not None or not getattr(job, 'killable', True)): continue self._kill_job(job) def terminate_all_jobs(self): self.server.killall() for job in self.jobs: if (isinstance(job, DeviceJob) or job.duration is not None or not getattr(job, 'killable', True)): continue if not isinstance(job, ParallelJob): self._kill_job(job) def universal_set(self): return set([i for i, j in enumerate(self.jobs) if not getattr(j, 'hidden_in_gui', False)]) def get_matches(self, location, query, candidates=None): if candidates is None: candidates = self.universal_set() ans = set() if not query: return ans query = lower(query) for j in candidates: job = self.jobs[j] if job.description and query in lower(job.description): ans.add(j) return ans def find(self, query): query = query.strip() rows = self.parse(query) return rows
def add_spare_server(self, *args): self.spare_servers.append( Server(limit=int(config['worker_limit'] / 2.0)))
def _run(self, tdir): from calibre.library.save_to_disk import config server = Server() if self.spare_server is None else self.spare_server ids = set(self.ids) tasks = server.split(list(ids)) jobs = set([]) c = config() recs = {} for pref in c.preferences: recs[pref.name] = getattr(self.opts, pref.name) plugboards = self.db.prefs.get('plugboards', {}) template_functions = self.db.prefs.get('user_template_functions', []) for i, task in enumerate(tasks): tids = [x[-1] for x in task] data = self.collect_data(tids, tdir) dpath = os.path.join(tdir, '%d.json'%i) with open(dpath, 'wb') as f: f.write(json.dumps(data, ensure_ascii=False).encode('utf-8')) job = ParallelJob('save_book', 'Save books (%d of %d)'%(i, len(tasks)), lambda x,y:x, args=[tids, dpath, plugboards, template_functions, self.path, recs]) jobs.add(job) server.add_job(job) while not self.canceled: time.sleep(0.2) running = False for job in jobs: self.get_notifications(job, ids) if not job.is_finished: running = True if not running: break for job in jobs: if not job.result: continue for id_, title, ok, tb in job.result: if id_ in ids: self.result_queue.put((id_, title, ok, tb)) ids.remove(id_) server.close() time.sleep(1) if self.canceled: return for job in jobs: if job.failed: prints(job.details) self.error = job.details if os.path.exists(job.log_path): try: os.remove(job.log_path) except: pass
def _run(self, tdir): from calibre.library.save_to_disk import config server = Server() if self.spare_server is None else self.spare_server ids = set(self.ids) tasks = server.split(list(ids)) jobs = set([]) c = config() recs = {} for pref in c.preferences: recs[pref.name] = getattr(self.opts, pref.name) plugboards = self.db.prefs.get('plugboards', {}) template_functions = self.db.prefs.get('user_template_functions', []) for i, task in enumerate(tasks): tids = [x[-1] for x in task] data = self.collect_data(tids, tdir) dpath = os.path.join(tdir, '%d.json' % i) with open(dpath, 'wb') as f: f.write(json.dumps(data, ensure_ascii=False).encode('utf-8')) job = ParallelJob('save_book', 'Save books (%d of %d)' % (i, len(tasks)), lambda x, y: x, args=[ tids, dpath, plugboards, template_functions, self.path, recs ]) jobs.add(job) server.add_job(job) while not self.canceled: time.sleep(0.2) running = False for job in jobs: self.get_notifications(job, ids) if not job.is_finished: running = True if not running: break for job in jobs: if not job.result: continue for id_, title, ok, tb in job.result: if id_ in ids: self.result_queue.put((id_, title, ok, tb)) ids.remove(id_) server.close() time.sleep(1) if self.canceled: return for job in jobs: if job.failed: prints(job.details) self.error = job.details if os.path.exists(job.log_path): try: os.remove(job.log_path) except: pass
def do_download_worker(book_list, options, cpus, notification=lambda x, y: x): ''' Master job, to launch child jobs to extract ISBN for a set of books This is run as a worker job in the background to keep the UI more responsive and get around the memory leak issues as it will launch a child job for each book as a worker process ''' server = Server(pool_size=cpus) logger.info(options['version']) total = 0 alreadybad = [] # Queue all the jobs logger.info("Adding jobs for URLs:") for book in book_list: logger.info("%s" % book['url']) if book['good']: total += 1 args = [ 'calibre_plugins.fanficfare_plugin.jobs', 'do_download_for_worker', (book, options) ] job = ParallelJob('arbitrary_n', "url:(%s) id:(%s)" % (book['url'], book['calibre_id']), done=None, args=args) job._book = book server.add_job(job) else: # was already bad before the subprocess ever started. alreadybad.append(book) # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator notification(0.01, _('Downloading FanFiction Stories')) # dequeue the job results as they arrive, saving the results count = 0 while True: job = server.changed_jobs_queue.get() # A job can 'change' when it is not finished, for example if it # produces a notification. Ignore these. job.update() if not job.is_finished: continue # A job really finished. Get the information. book_list.remove(job._book) book_list.append(job.result) book_id = job._book['calibre_id'] count = count + 1 notification( float(count) / total, '%d of %d stories finished downloading' % (count, total)) # Add this job's output to the current log logger.info('Logfile for book ID %s (%s)' % (book_id, job._book['title'])) logger.info(job.details) if count >= total: logger.info("\n" + _("Successful:") + "\n%s\n" % ("\n".join([ book['url'] for book in filter(lambda x: x['good'], book_list) ]))) logger.info("\n" + _("Unsuccessful:") + "\n%s\n" % ("\n".join([ book['url'] for book in filter(lambda x: not x['good'], book_list) ]))) break server.close() # return the book list as the job result return book_list
def do_download_worker(book_list, options, cpus, merge=False, notification=lambda x, y: x): ''' Coordinator job, to launch child jobs to extract ISBN for a set of books This is run as a worker job in the background to keep the UI more responsive and get around the memory leak issues as it will launch a child job for each book as a worker process ''' server = Server(pool_size=cpus) logger.info(options['version']) total = 0 alreadybad = [] # Queue all the jobs logger.info("Adding jobs for URLs:") for book in book_list: logger.info("%s" % book['url']) if book['good']: total += 1 args = [ 'calibre_plugins.fanficfare_plugin.jobs', 'do_download_for_worker', (book, options, merge) ] job = ParallelJob('arbitrary_n', "url:(%s) id:(%s)" % (book['url'], book['calibre_id']), done=None, args=args) job._book = book server.add_job(job) else: # was already bad before the subprocess ever started. alreadybad.append(book) # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator notification(0.01, _('Downloading FanFiction Stories')) # dequeue the job results as they arrive, saving the results count = 0 while True: job = server.changed_jobs_queue.get() # A job can 'change' when it is not finished, for example if it # produces a notification. Ignore these. job.update() if not job.is_finished: continue # A job really finished. Get the information. book_list.remove(job._book) book_list.append(job.result) book_id = job._book['calibre_id'] count = count + 1 notification( float(count) / total, _('%d of %d stories finished downloading') % (count, total)) # Add this job's output to the current log logger.info('Logfile for book ID %s (%s)' % (book_id, job._book['title'])) logger.info(job.details) if count >= total: book_list = sorted(book_list, key=lambda x: x['listorder']) logger.info("\n" + _("Download Results:") + "\n%s\n" % ("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list ]))) good_lists = defaultdict(list) bad_lists = defaultdict(list) for book in book_list: if book['good']: good_lists[book['status']].append(book) else: bad_lists[book['status']].append(book) order = [ _('Add'), _('Update'), _('Meta'), _('Different URL'), _('Rejected'), _('Skipped'), _('Bad'), _('Error'), ] j = 0 for d in [good_lists, bad_lists]: for status in order: if d[status]: l = d[status] logger.info("\n" + status + "\n%s\n" % ("\n".join([book['url'] for book in l]))) for book in l: book['reportorder'] = j j += 1 del d[status] # just in case a status is added but doesn't appear in order. for status in d.keys(): logger.info("\n" + status + "\n%s\n" % ("\n".join([book['url'] for book in d[status]]))) break server.close() # return the book list as the job result return book_list
def do_download_worker(book_list, options, cpus, merge=False, notification=lambda x, y: x): ''' Coordinator job, to launch child jobs to do downloads. This is run as a worker job in the background to keep the UI more responsive and get around any memory leak issues as it will launch a child job for each book as a worker process ''' ## Now running one BG proc per site, which downloads for the same ## site in serial. logger.info("CPUs:%s" % cpus) server = Server(pool_size=cpus) logger.info(options['version']) sites_lists = defaultdict(list) [sites_lists[x['site']].append(x) for x in book_list if x['good']] totals = {} # can't do direct assignment in list comprehension? I'm sure it # makes sense to some pythonista. # [ totals[x['url']]=0.0 for x in book_list if x['good'] ] [totals.update({x['url']: 0.0}) for x in book_list if x['good']] # logger.debug(sites_lists.keys()) # Queue all the jobs jobs_running = 0 for site in sites_lists.keys(): site_list = sites_lists[site] logger.info( _("Launch background process for site %s:") % site + "\n" + "\n".join([x['url'] for x in site_list])) # logger.debug([ x['url'] for x in site_list]) args = [ 'calibre_plugins.fanficfare_plugin.jobs', 'do_download_site', (site, site_list, options, merge) ] job = ParallelJob('arbitrary_n', "site:(%s)" % site, done=None, args=args) job._site_list = site_list job._processed = False server.add_job(job) jobs_running += 1 # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator notification(0.01, _('Downloading FanFiction Stories')) # dequeue the job results as they arrive, saving the results count = 0 while True: job = server.changed_jobs_queue.get() # logger.debug("job get job._processed:%s"%job._processed) # A job can 'change' when it is not finished, for example if it # produces a notification. msg = None try: ## msg = book['url'] (percent, msg) = job.notifications.get_nowait() # logger.debug("%s<-%s"%(percent,msg)) if percent == 10.0: # Only when signaling d/l done. count += 1 totals[msg] = 1.0 / len(totals) # logger.info("Finished: %s"%msg) else: totals[msg] = percent / len(totals) notification( max(0.01, sum(totals.values())), _('%(count)d of %(total)d stories finished downloading') % { 'count': count, 'total': len(totals) }) except Empty: pass # without update, is_finished will never be set. however, we # do want to get all the notifications for status so we don't # miss the 'done' ones. job.update(consume_notifications=False) # if not job._processed: # sleep(0.5) ## Can have a race condition where job.is_finished before ## notifications for all downloads have been processed. ## Or even after the job has been finished. # logger.debug("job.is_finished(%s) or job._processed(%s)"%(job.is_finished, job._processed)) if not job.is_finished: continue ## only process each job once. We can get more than one loop ## after job.is_finished. if not job._processed: # sleep(1) # A job really finished. Get the information. ## This is where bg proc details end up in GUI log. ## job.details is the whole debug log for each proc. logger.info("\n\n" + ("=" * 80) + " " + job.details.replace('\r', '')) # logger.debug("Finished background process for site %s:\n%s"%(job._site_list[0]['site'],"\n".join([ x['url'] for x in job._site_list ]))) for b in job._site_list: book_list.remove(b) book_list.extend(job.result) job._processed = True jobs_running -= 1 ## Can't use individual count--I've seen stories all reported ## finished before results of all jobs processed. if jobs_running == 0: book_list = sorted(book_list, key=lambda x: x['listorder']) logger.info("\n" + _("Download Results:") + "\n%s\n" % ("\n".join([ "%(status)s %(url)s %(comment)s" % book for book in book_list ]))) good_lists = defaultdict(list) bad_lists = defaultdict(list) for book in book_list: if book['good']: good_lists[book['status']].append(book) else: bad_lists[book['status']].append(book) order = [ _('Add'), _('Update'), _('Meta'), _('Different URL'), _('Rejected'), _('Skipped'), _('Bad'), _('Error'), ] j = 0 for d in [good_lists, bad_lists]: for status in order: if d[status]: l = d[status] logger.info("\n" + status + "\n%s\n" % ("\n".join([book['url'] for book in l]))) for book in l: book['reportorder'] = j j += 1 del d[status] # just in case a status is added but doesn't appear in order. for status in d.keys(): logger.info("\n" + status + "\n%s\n" % ("\n".join([book['url'] for book in d[status]]))) break server.close() # return the book list as the job result return book_list
def do_extract_worker(books_to_scan, failed_ids, no_format_ids, cpus, notification=lambda x,y:x): ''' Master job, to launch child jobs to extract ISBN for a set of books This is run as a worker job in the background to keep the UI more responsive and get around the memory leak issues as it will launch a child job for each book as a worker process ''' server = Server(pool_size=cpus) # Queue all the jobs for book_id, title, modified_date, existing_isbn, paths_for_formats in books_to_scan: args = ['calibre_plugins.extract_isbn.jobs', 'do_extract_isbn_for_book_worker', (title, paths_for_formats)] job = ParallelJob('arbitrary', str(book_id), done=None, args=args) job._book_id = book_id job._title = title job._modified_date = modified_date job._existing_isbn = existing_isbn server.add_job(job) # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator notification(0.01, 'Extracting ISBN') # dequeue the job results as they arrive, saving the results total = len(books_to_scan) count = 0 extracted_ids, same_isbn_ids = [], [] while True: job = server.changed_jobs_queue.get() # A job can 'change' when it is not finished, for example if it # produces a notification. Ignore these. job.update() if not job.is_finished: continue # A job really finished. Get the information. isbn = job.result book_id = job._book_id title = job._title count = count + 1 notification(float(count)/total, 'Extracted ISBN') # Add this job's output to the current log print('Logfile for book ID %d (%s)'%(book_id, title)) print(job.details) if isbn: if job._existing_isbn == isbn: print(' Identical ISBN extracted of: %s'%(isbn,)) same_isbn_ids.append((book_id, title)) else: print(' New ISBN extracted of: %s'%(isbn,)) extracted_ids.append((book_id, title, job._modified_date, isbn)) else: print(' Failed to extract ISBN') failed_ids.append((book_id, title)) print('===================================================') if count >= total: # All done! break server.close() # return the map as the job result return extracted_ids, same_isbn_ids, failed_ids, no_format_ids
def do_count_statistics(books_to_scan, pages_algorithm, use_goodreads, nltk_pickle, cpus, notification=lambda x, y: x): """ Master job, to launch child jobs to count pages in this list of books """ server = Server(pool_size=cpus) # Queue all the jobs for book_id, title, book_path, goodreads_id, statistics_to_run in books_to_scan: args = [ "calibre_plugins.count_pages.jobs", "do_statistics_for_book", (book_path, pages_algorithm, goodreads_id, use_goodreads, statistics_to_run, nltk_pickle), ] job = ParallelJob("arbitrary", str(book_id), done=None, args=args) job._book_id = book_id job._title = title job._pages_algorithm = pages_algorithm job._goodreads_id = goodreads_id job._use_goodreads = use_goodreads job._statistics_to_run = statistics_to_run server.add_job(job) # This server is an arbitrary_n job, so there is a notifier available. # Set the % complete to a small number to avoid the 'unavailable' indicator notification(0.01, "Counting Statistics") # dequeue the job results as they arrive, saving the results total = len(books_to_scan) count = 0 book_stats_map = dict() while True: job = server.changed_jobs_queue.get() # A job can 'change' when it is not finished, for example if it # produces a notification. Ignore these. job.update() if not job.is_finished: continue # A job really finished. Get the information. results = job.result book_id = job._book_id book_stats_map[book_id] = results count = count + 1 notification(float(count) / total, "Counting Statistics") # Add this job's output to the current log print("-------------------------------") print("Logfile for book ID %d (%s)" % (book_id, job._title)) for stat in job._statistics_to_run: if stat == cfg.STATISTIC_PAGE_COUNT: if job._use_goodreads: if job._goodreads_id is not None: if stat in results and results[stat]: print("\tGoodreads edition has %d pages" % results[stat]) else: print("\tFAILED TO GET PAGE COUNT FROM GOODREADS") else: if stat in results and results[stat]: print("\tFound %d pages" % results[stat]) elif stat == cfg.STATISTIC_WORD_COUNT: if stat in results and results[stat]: print("\tFound %d words" % results[stat]) elif stat == cfg.STATISTIC_FLESCH_READING: if stat in results and results[stat]: print("\tComputed %.1f Flesch Reading" % results[stat]) elif stat == cfg.STATISTIC_FLESCH_GRADE: if stat in results and results[stat]: print("\tComputed %.1f Flesch-Kincaid Grade" % results[stat]) elif stat == cfg.STATISTIC_GUNNING_FOG: if stat in results and results[stat]: print("\tComputed %.1f Gunning Fog Index" % results[stat]) print(job.details) if count >= total: # All done! break server.close() # return the map as the job result return book_stats_map