def run(self): self.logger = log.get_logger('Task') self.task_api = TaskApi() try: result = self.task_api.query({ 'task_id': self.options.task_id }) except exceptions.ApiException as e: print e.message sys.exit(1) result = { k: str(v) if isinstance(v, SuperEnum.Element) else v for k, v in result.items() } print json.dumps(result, sort_keys=True, indent=4 * ' ')
def _save(self, finished=None, steps=None, result=None, data=None): finished = finished if finished is not None else self.finished with self.storage.transaction() as cursor: apsw_helpers.query(cursor, ''' UPDATE %s SET last_contact=datetime(:now, 'unixepoch'), update_count=update_count + 1, steps=:steps, finished=datetime(:finished, 'unixepoch'), result=:result, bytes_downloaded=:bytes_downloaded, download_rate=:download_rate, data=:data WHERE id = :task_id AND execution_id = :execution_id AND last_contact > datetime(:now, 'unixepoch', '-%s second') ''' % (self._queue.table_name, self._queue.execution_ttl), now=unix_timestamp(datetime.utcnow()), task_id=self.task_id, execution_id=self.execution_id, steps=json.dumps(steps if steps is not None else self.steps), finished=unix_timestamp(finished) if finished else None, result=result if result is not None else self.result, bytes_downloaded=self.bytes_downloaded, download_rate=self.download_rate, data=json.dumps(data if data is not None else self.data)) affected_row = apsw_helpers.get(cursor, ''' SELECT * from %s WHERE id = :task_id AND execution_id = :execution_id AND last_contact > datetime(:now, 'unixepoch', '-%s second') ''' % (self._queue.table_name, self._queue.execution_ttl), now=unix_timestamp(datetime.utcnow()), task_id=self.task_id, execution_id=self.execution_id) if not affected_row: raise TaskDoesNotExist() else: if steps is not None: self.steps = steps if finished is not None: self.finished = finished if result is not None: self.result = result if data is not None: self.data = data
def run(self): self.logger = log.get_logger('Task') self.task_api = TaskApi() try: result = self.task_api.query({'task_id': self.options.task_id}) except exceptions.ApiException as e: print e.message sys.exit(1) result = { k: str(v) if isinstance(v, SuperEnum.Element) else v for k, v in result.items() } print json.dumps(result, sort_keys=True, indent=4 * ' ')
def format(self): if self.tablefmt == TableFormat.JSON: # TODO(cary) Patch clark.super_enum to support JSON serialization printable_data = [{ k: str(v) if isinstance(v, SuperEnum.Element) else v for k, v in row.iteritems() } for row in self.data] return json.dumps(printable_data, sort_keys=True, indent=4 * ' ').encode('utf-8') else: ptable = PrettyTable(self.columns) for k, v in self.align.iteritems(): ptable.align[k] = v for row in self.data: ptable.add_row([row[col] for col in self.columns]) if self.tablefmt == TableFormat.TABLE: return ptable.get_string( sortby=self.sort_by, reversesort=self.reverse_sort).encode('utf-8') elif self.tablefmt == TableFormat.HTML: return ptable.get_html_string( sortby=self.sort_by, reversesort=self.reverse_sort).encode('utf-8')
def enqueue(self, data, job_id=None, file_id=None, md5=None, bytes_total=None): """ Enqueue task with specified data. """ jsonified_data = json.dumps(data) with self.storage.transaction() as cursor: apsw_helpers.query(cursor, ''' INSERT INTO %s (created, data, job_id, file_id, md5, bytes_total) VALUES (datetime(:now, "unixepoch"), :data, :job_id, :file_id, :md5, :bytes_total) ''' % self.table_name, now=unix_timestamp(datetime.utcnow()), data=jsonified_data, job_id=job_id, file_id=file_id, md5=md5, bytes_total=bytes_total) # Return the number of rows we inserted. return 1
def requeue(self): if self._running_steps() != 0: raise StepRunning() if self.finished is not None: raise AlreadyFinished() data = copy.deepcopy(self.data) self.bytes_downloaded = None self.download_rate = None data.pop('time_left', None) with self._queue.storage.transaction() as cursor: affected_row = apsw_helpers.get(cursor, ''' SELECT * from %s WHERE id = :task_id AND execution_id = :execution_id AND last_contact > datetime(:now, 'unixepoch', '-%s second') ''' % (self._queue.table_name, self._queue.execution_ttl), now=unix_timestamp(datetime.utcnow()), task_id=self.task_id, execution_id=self.execution_id) if not affected_row: raise TaskDoesNotExist() apsw_helpers.query(cursor, ''' UPDATE %s SET last_contact=NULL, update_count=update_count + 1, started=NULL, steps=NULL, execution_id=NULL, finished=NULL, data=:data, result=NULL WHERE id = :task_id AND execution_id = :execution_id AND last_contact > datetime(:now, 'unixepoch', '-%s second') ''' % (self._queue.table_name, self._queue.execution_ttl), data=json.dumps(data), now=unix_timestamp(datetime.utcnow()), task_id=self.task_id, execution_id=self.execution_id)
def format(self): if self.tablefmt == TableFormat.JSON: # TODO(cary) Patch clark.super_enum to support JSON serialization printable_data = [ { k: str(v) if isinstance(v, SuperEnum.Element) else v for k, v in row.iteritems() } for row in self.data ] return json.dumps(printable_data, sort_keys=True, indent=4 * ' ').encode('utf-8') else: ptable = PrettyTable(self.columns) for k, v in self.align.iteritems(): ptable.align[k] = v for row in self.data: ptable.add_row([ row[col] for col in self.columns ]) if self.tablefmt == TableFormat.TABLE: return ptable.get_string(sortby=self.sort_by, reversesort=self.reverse_sort).encode('utf-8') elif self.tablefmt == TableFormat.HTML: return ptable.get_html_string(sortby=self.sort_by, reversesort=self.reverse_sort).encode('utf-8')
def json_spec(self): return json.dumps(self.spec)
def run(self): self.logger = log.get_logger('Job') self.job_api = JobApi() self.tasks_api = TasksApi() try: result = self.job_api.query({ 'job_id': self.options.job_id }) except exceptions.ApiException as e: print e.message sys.exit(1) if self.options.spec: print json.dumps(result.spec, sort_keys=True, indent=4 * ' ') else: try: finished_tasks = self.tasks_api.query({ 'job_id': self.options.job_id, 'state': 'SUCCESS' }) except exceptions.ApiException as e: print e.message sys.exit(1) files_loaded = len(finished_tasks) rows_loaded = reduce(lambda x, y: x + y.get('data', {}).get('row_count', 0), finished_tasks, 0) avg_rows_per_file = None avg_rows_per_second = None if files_loaded > 0: avg_rows_per_file = rows_loaded / files_loaded min_start_time = datetime.datetime.max max_stop_time = datetime.datetime.min for row in finished_tasks: for step in row.steps: if step['name'] == 'download': min_start_time = min(min_start_time, step['start']) max_stop_time = max(max_stop_time, step['stop']) break else: continue avg_rows_per_second = rows_loaded / (max_stop_time - min_start_time).total_seconds() result['stats'] = { k: v for k, v in { 'files_loaded': files_loaded, 'rows_loaded': rows_loaded, 'avg_rows_per_file': avg_rows_per_file, 'avg_rows_per_second': avg_rows_per_second }.iteritems() if v is not None } if result.tasks_total > 0: result['stats'].update({ 'success_rate': result.tasks_succeeded * 1.0 / result.tasks_total, 'error_rate': result.tasks_errored * 1.0 / result.tasks_total }) result["database"] = result.spec["target"]["database"] result["table"] = result.spec["target"]["table"] result = dict(result) del result['spec'] result = { k: str(v) if isinstance(v, SuperEnum.Element) else v for k, v in result.iteritems() } print json.dumps(result, sort_keys=True, indent=4 * ' ')