def get_status(self): if not self.connection: return TaskStatus(0) percentage = (self.connection.bytes_done / self.connection.bytes_total) * 100 return TaskStatus(percentage)
def abort(self, id, error=None): task = self.get_task(id) if not task: self.logger.warning("Cannot abort task: unknown task id %d", id) return success = False if task.started_at is None: success = True else: try: task.executor.abort() except: pass if success: task.ended.set() if error: task.set_state(TaskState.FAILED, TaskStatus(0), serialize_error(error)) self.logger.debug("Task ID: %d, name: %s aborted with error", task.id, task.name) else: task.set_state(TaskState.ABORTED, TaskStatus(0, "Aborted")) self.logger.debug("Task ID: %d, name: %s aborted by user", task.id, task.name)
def set_state(self, state=None, progress=None, error=None): with self.slock: if state: self.state = state event = {'id': self.id, 'name': self.name, 'state': self.state} if error: self.error = error if self.state == TaskState.EXECUTING: if not self.started_at: self.started_at = datetime.utcnow() event['started_at'] = self.started_at if self.state == TaskState.FINISHED: self.finished_at = datetime.utcnow() self.progress = TaskStatus(100) event['finished_at'] = self.finished_at event['result'] = self.result if self.state in (TaskState.FAILED, TaskState.ABORTED): self.progress = TaskStatus(0) self.dispatcher.dispatch_event( 'task.created' if self.state == TaskState.CREATED else 'task.updated', event) self.dispatcher.datastore_log.update('tasks', self.id, self) self.dispatcher.dispatch_event( 'task.changed', { 'operation': 'create' if state == TaskState.CREATED else 'update', 'ids': [self.id] }) if progress and self.state not in (TaskState.FINISHED, TaskState.FAILED, TaskState.ABORTED): self.progress = progress self.__emit_progress() if self.state in (TaskState.FINISHED, TaskState.FAILED, TaskState.ABORTED): try: # Remove all subtasks for i in filter(lambda t: t.parent is self, self.balancer.task_list): self.balancer.task_list.remove(i) # If top-level task, also remove self if self.parent is None: self.balancer.task_list.remove(self) except ValueError: # failed in verify stage pass
def get_status(self): if not self.conn: return None try: st = TaskStatus(0) st.__setstate__(self.conn.call_client_sync('taskproxy.get_status')) return st except RpcException as err: self.balancer.logger.error("Cannot obtain status from task #{0}: {1}".format(self.task.id, str(err))) self.proc.terminate()
def get_status(self): with self.cv: self.cv.wait_for(lambda: self.state == WorkerState.EXECUTING) try: st = TaskStatus(0) st.__setstate__(self.conn.call_sync('taskproxy.get_status')) return st except RpcException as err: self.balancer.logger.error( "Cannot obtain status from task #{0}: {1}".format(self.task.id, str(err)) ) self.terminate()
def get_status(self): if not self.conn: return None try: st = TaskStatus(0) if issubclass(self.task.clazz, MasterProgressTask): progress_subtask_info = self.conn.call_client_sync( 'taskproxy.get_master_progress_info' ) if progress_subtask_info['increment_progress'] != 0: progress_subtask_info['progress'] += progress_subtask_info['increment_progress'] progress_subtask_info['increment_progress'] = 0 self.conn.call_client_sync( 'taskproxy.set_master_progress_detail', { 'progress': progress_subtask_info['progress'], 'increment_progress': progress_subtask_info['increment_progress'] } ) if progress_subtask_info['active_tids']: progress_to_increment = 0 concurent_weight = progress_subtask_info['concurent_subtask_detail']['average_weight'] for tid in progress_subtask_info['concurent_subtask_detail']['tids']: subtask_status = self.balancer.get_task(tid).executor.get_status() progress_to_increment += subtask_status.percentage * concurent_weight * \ progress_subtask_info['subtask_weights'][str(tid)] for tid in set(progress_subtask_info['active_tids']).symmetric_difference( set(progress_subtask_info['concurent_subtask_detail']['tids']) ): subtask_status = self.balancer.get_task(tid).executor.get_status() progress_to_increment += subtask_status.percentage * \ progress_subtask_info['subtask_weights'][str(tid)] progress_subtask_info['progress'] += int(progress_to_increment) if progress_subtask_info['pass_subtask_details']: progress_subtask_info['message'] = subtask_status.message st = TaskStatus( progress_subtask_info['progress'], progress_subtask_info['message'] ) else: st.__setstate__(self.conn.call_client_sync('taskproxy.get_status')) return st except RpcException as err: self.balancer.logger.error( "Cannot obtain status from task #{0}: {1}".format(self.task.id, str(err)) ) self.proc.terminate()
def __init__(self, dispatcher, name=None): self.dispatcher = dispatcher self.balancer = dispatcher.balancer self.created_at = None self.started_at = None self.finished_at = None self.id = None self.name = name self.clazz = None self.args = None self.user = None self.session_id = None self.error = None self.state = TaskState.CREATED self.description = None self.progress = TaskStatus(None) self.resources = [] self.warnings = [] self.environment = {} self.hooks = {} self.instance = None self.parent = None self.result = None self.output = '' self.rusage = None self.slock = RLock() self.ended = Event() self.debugger = None self.executor = None self.strict_verify = None
def run(self): pool = NodePool.get_instance() status = TaskStatus() # 10 times retry maximum until success for i in range(10): node = pool.get_node(self.timeout) status = node.execute(self.task) pool.return_node(node) if self.debug: print(status.to_json()) if status.code == TaskStatus.CODE_EXECUTION_SUCCESS: if status.result is not None: self.manager.append(status.result) break
def execute(self, task, args=None): status = TaskStatus() status.id = 'LOCAL-' + task.id status.args = args if args is not None else task.args try: result = task.execute(args) status.code = TaskStatus.CODE_EXECUTION_SUCCESS status.message = "OK" status.result = result except Exception as e: status.code = TaskStatus.CODE_EXECUTION_FAILED status.message = str(e) return status
def get_status(self): if not self.started: return TaskStatus(0, "Waiting to start...") try: zfs = libzfs.ZFS() pool = zfs.get(self.pool) scrub = pool.scrub except libzfs.ZFSException, err: raise TaskException(errno.EFAULT, str(err))
def distribution_thread(self): while True: self.task_queue.peek() self.distribution_lock.acquire() task = self.task_queue.get() try: self.logger.debug("Picked up task %d: %s with args %s", task.id, task.name, task.args) errors = self.verify_schema(self.dispatcher.tasks[task.name], task.args, task.strict_verify) if len(errors) > 0: errors = list(validator.serialize_errors(errors)) self.logger.warning( "Cannot submit task {0}: schema verification failed with errors {1}" .format(task.name, errors)) raise ValidationException(extra=errors) task.instance = task.clazz(self.dispatcher, self.dispatcher.datastore) task.resources = task.instance.verify(*task.args) task.description = task.instance.describe(*task.args) if type(task.resources) is not list: raise ValueError( "verify() returned something else than resource list") except Exception as err: self.logger.warning("Cannot verify task %d: %s", task.id, err) task.set_state(TaskState.FAILED, TaskStatus(0), serialize_error(err)) task.ended.set() self.distribution_lock.release() if not isinstance(err, VerifyException): self.dispatcher.report_error( 'Task {0} verify() method raised invalid exception'. format(err), err) continue task.set_state(TaskState.WAITING) self.task_list.append(task) self.distribution_lock.release() self.schedule_tasks() if task.resources: self.logger.debug("Task %d assigned to resources %s", task.id, ','.join(task.resources))
def put_progress(self, progress): st = TaskStatus(None) st.__setstate__(progress) self.task.set_state(progress=st)
def handle(conn): # receiving task object request = conn.recv(1024) try: task = pickle.loads(request) status_code, status_message, task_id = TaskStatus.CODE_REQUEST_SUCCESS, 'OK', task.id except Exception as e: status_code, status_message, task_id = TaskStatus.CODE_REQUEST_FAILED, str( e), None # send status status = TaskStatus(status_code, status_message) status.id = task_id status.args = task.args try: conn.sendall(status.serialize()) except Exception as e: status_code, status_message, task_id = TaskStatus.CODE_REQUEST_FAILED, str( e), None # execute task if status_code == TaskStatus.CODE_REQUEST_SUCCESS: try: result_data = task.execute() status.code = TaskStatus.CODE_EXECUTION_SUCCESS status.result = result_data conn.sendall(status.serialize()) print(task_id, 'success') except Exception as e: try: status.code = TaskStatus.CODE_EXECUTION_FAILED status.message = str(e) conn.sendall(status.serialize()) except: pass conn.close()
def run(self, task): def match_file(module, f): name, ext = os.path.splitext(f) return module == name and ext in ['.py', '.pyc', '.so'] with self.cv: self.cv.wait_for(lambda: self.state == WorkerState.ASSIGNED) self.result = AsyncResult() self.task = task self.task.set_state(TaskState.EXECUTING) self.state = WorkerState.EXECUTING self.cv.notify_all() self.balancer.logger.debug('Actually starting task {0}'.format( task.id)) filename = None module_name = inspect.getmodule(task.clazz).__name__ for dir in self.balancer.dispatcher.plugin_dirs: found = False try: for root, _, files in os.walk(dir): file = first_or_default( lambda f: match_file(module_name, f), files) if file: filename = os.path.join(root, file) found = True break if found: break except OSError: continue try: self.conn.call_sync( 'taskproxy.run', { 'id': task.id, 'user': task.user, 'class': task.clazz.__name__, 'filename': filename, 'args': task.args, 'debugger': task.debugger, 'environment': task.environment, 'hooks': task.hooks, }) except RpcException as e: self.balancer.logger.warning( 'Cannot start task {0} on executor #{1}: {2}'.format( task.id, self.index, str(e))) self.balancer.logger.warning( 'Killing unresponsive task executor #{0} (pid {1})'.format( self.index, self.proc.pid)) self.terminate() try: self.result.get() except BaseException as e: if isinstance(e, OtherException): self.balancer.dispatcher.report_error( 'Task {0} raised invalid exception'.format(self.task.name), e) if isinstance(e, TaskAbortException): self.task.set_state(TaskState.ABORTED, TaskStatus(0, 'aborted')) else: self.task.error = serialize_error(e) self.task.set_state( TaskState.FAILED, TaskStatus(0, str(e), extra={"stacktrace": traceback.format_exc()})) with self.cv: self.task.ended.set() if self.state == WorkerState.EXECUTING: self.state = WorkerState.IDLE self.cv.notify_all() self.balancer.task_exited(self.task) return with self.cv: self.task.result = self.result.value self.task.set_state(TaskState.FINISHED, TaskStatus(100, '')) self.task.ended.set() if self.state == WorkerState.EXECUTING: self.state = WorkerState.IDLE self.cv.notify_all() self.balancer.task_exited(self.task)
def get_status(self): if not self.started: return TaskStatus(0, 'Erasing disk...') return TaskStatus(self.remaining / self.mediasize, 'Erasing disk...')
return True def get_status(self): if not self.started: return TaskStatus(0, "Waiting to start...") try: zfs = libzfs.ZFS() pool = zfs.get(self.pool) scrub = pool.scrub except libzfs.ZFSException, err: raise TaskException(errno.EFAULT, str(err)) if scrub.state == libzfs.ScanState.SCANNING: self.progress = scrub.percentage return TaskStatus(self.progress, "In progress...") if scrub.state == libzfs.ScanState.CANCELED: self.finish_event.set() return TaskStatus(self.progress, "Canceled") if scrub.state == libzfs.ScanState.FINISHED: self.finish_event.set() return TaskStatus(100, "Finished") def verify(self, pool): zfs = libzfs.ZFS() pool = zfs.get(pool) return get_disk_names(self.dispatcher, pool)
def execute(self, task, args=None): # set arguments if args is not None: task.args = args # initialize status status = TaskStatus() status.host = self.host with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: # sending task request try: s.connect((self.host, self.port)) s.settimeout(1) status.id = task.id s.sendall(task.serialize()) data = s.recv(1024) s.settimeout(self.timeout) except Exception as e: status.code = TaskStatus.CODE_REQUEST_TIME_OUT status.message = str(e) return status temp = None try: status = pickle.loads(data) status.host = self.host status.id = task.id if status.code == TaskStatus.CODE_REQUEST_SUCCESS: data = s.recv(2048) temp = data status = pickle.loads(data) status.host = self.host status.id = task.id except Exception as e: status.code = TaskStatus.CODE_SERVER_ERROR status.message = str(e) try: s.close() except Exception as e: print(str(e)) return status