def to_dict(self, **kw): incl_reports = kw.get("incl_reports", True) d = { 'id': self.id, 'org_id': self.org_id, 'sous_chef': self.sous_chef.slug, 'name': self.name, 'slug': self.slug, 'description': self.description, 'created': self.created, 'updated': self.updated, 'last_run': self.last_run, 'schedule_by': self.schedule_by, 'crontab': self.crontab, 'time_of_day': self.time_of_day, 'minutes': self.minutes, 'status': self.status, 'traceback': self.traceback, 'last_job': self.last_job, 'options': pickle_to_obj(self.options) } if 'metrics' in self.sous_chef.creates: d['metrics'] = self.metric_names if incl_reports: if 'report' in self.sous_chef.creates: d['reports'] = self.report_names return d
def bulkworker(job_id, **qkw): """ Fetch a job and execute it. """ start = time.time() try: k = qkw['job_key_fmt'].format(job_id) job = rds.get(k) if not job: raise InternalServerError( 'An unexpected error occurred while processing bulk upload.' ) if qkw['serializer'] == 'json': job = json_to_obj(job) elif qkw['serializer'] == 'pickle': job = pickle_to_obj(job) data = job.pop('data', []) job = job.pop('kw', {}) # delete them rds.delete(k) # chunk list chunked_data = util.chunk_list(data, qkw.get('chunk_size')) # partial funtion load_fx = partial(ingest.source, **job) # pooled execution pool = Pool(qkw.get('max_workers', MAX_WORKERS)) for res in pool.imap_unordered(load_fx, chunked_data): pass return True except Exception: tb = format_exc() raise RequestError('An Error Ocurred while running {}:\n{}'.format(job_id, tb)) except JobTimeoutException: end = time.time() raise InternalServerError( 'Bulk loading timed out after {} seconds' .format(end-start))
def run_sous_chef(sous_chef_path, recipe_id, kw_key): """ Do the work. This exists outside the class in order to enable pickling. """ recipe = db.session.query(Recipe).get(recipe_id) try: # load in kwargs kw = rds.get(kw_key) if not kw: raise InternalServerError( 'An unexpected error occurred while attempting to run a Sous Chef.' ) kw = pickle_to_obj(kw) # delete them. rds.delete(kw_key) # import sous chef SousChef = import_sous_chef(sous_chef_path) # initialize it with kwargs sc = SousChef(**kw) # cook it. sc.cook() # update status and next job from sous chef. recipe.status = "stable" recipe.traceback = None # if something is set on this object, add it. if len(sc.next_job.keys()): recipe.last_job = sc.next_job db.session.add(recipe) db.session.commit() return True except Exception as e: # keep track of the error. db.session.rollback() recipe.status = "error" recipe.traceback = format_exc() db.session.add(recipe) db.session.commit() return MerlynneError(e)
def bulkworker(job_id, **qkw): """ Fetch a job and execute it. """ start = time.time() try: k = qkw['job_key_fmt'].format(job_id) job = rds.get(k) if not job: raise InternalServerError( 'An unexpected error occurred while processing bulk upload.') if qkw['serializer'] == 'json': job = json_to_obj(job) elif qkw['serializer'] == 'pickle': job = pickle_to_obj(job) data = job.pop('data', []) job = job.pop('kw', {}) # delete them rds.delete(k) # chunk list chunked_data = util.chunk_list(data, qkw.get('chunk_size')) # partial funtion load_fx = partial(ingest.source, **job) # pooled execution pool = Pool(qkw.get('max_workers', MAX_WORKERS)) for res in pool.imap_unordered(load_fx, chunked_data): pass return True except Exception: tb = format_exc() raise RequestError('An Error Ocurred while running {}:\n{}'.format( job_id, tb)) except JobTimeoutException: end = time.time() raise InternalServerError( 'Bulk loading timed out after {} seconds'.format(end - start))
def to_dict(self): d = { 'id': self.id, 'org_id': self.org_id, 'sous_chef': self.sous_chef.slug, 'name': self.name, 'slug': self.slug, 'description': self.description, 'created': self.created, 'updated': self.updated, 'last_run': self.last_run, 'schedule_by': self.schedule_by, 'crontab': self.crontab, 'time_of_day': self.time_of_day, 'minutes': self.minutes, 'status': self.status, 'traceback': self.traceback, 'last_job': self.last_job, 'options': pickle_to_obj(self.options) } if self.sous_chef.creates == 'metrics': d['metrics'] = self.metrics return d
def load_all(self, kwargs_key): """ Do the work. """ start = time.time() try: # create a session specific to this task session = gen_session() # get the inputs from redis kwargs = self.redis.get(kwargs_key) if not kwargs: raise InternalServerError( 'An unexpected error occurred while processing bulk upload.' ) kwargs = pickle_to_obj(kwargs) data = kwargs.get('data') kw = kwargs.get('kw') # delete them self.redis.delete(kwargs_key) outputs = [] errors = [] fx = partial(self._load_one, **kw) if self.concurrent: pool = Pool(min([len(data), self.max_workers])) for res in pool.imap_unordered(fx, data): if isinstance(res, Exception): errors.append(res) else: outputs.append(res) else: for item in data: res = fx(item) if isinstance(res, Exception): errors.append(res) else: outputs.append(res) # return errors if len(errors): self._handle_errors(errors) # add objects and execute if self.returns == 'model': for o in outputs: if o is not None: try: session.add(o) session.commit(o) except Exception as e: self._handle_errors(e) # union all queries elif self.returns == 'query': for query in outputs: if query is not None: try: session.execute(query) except Exception as e: self._handle_errors(e) try: session.commit() except Exception as e: session.rollback() session.remove() self._handle_errors(e) # return true if everything worked. session.close() return True except JobTimeoutException: end = time.time() return InternalServerError( 'Bulk loading timed out after {} seconds' .format(end-start))
def run(sous_chef_path, recipe_id, kw_key, **kw): """ Do the work. This exists outside the class in order to enable pickling for the task queue. """ recipe = db.session.query(Recipe).get(recipe_id) try: if kw_key: # load in kwargs kw = rds.get(kw_key) if not kw: raise InternalServerError( 'An unexpected error occurred while attempting to run a Sous Chef.' ) kw = pickle_to_obj(kw) # delete them. rds.delete(kw_key) # import sous chef SousChef = sc_exec.from_import_path(sous_chef_path) # initialize it with kwargs kw['org'] = db.session\ .query(Org).get(recipe.org.id)\ .to_dict(incl_domains=True) kw['recipe'] = recipe.to_dict() sous_chef = SousChef(**kw) # indicate that the job is running if not kw.get('passthrough', False): recipe.status = 'running' db.session.add(recipe) db.session.commit() # cook it. data = sous_chef.cook() # passthrough the data. if kw.get('passthrough', False): return data # otherwise just exhaust the generator if isgenerator(data): data = list(data) # teardown this recipe sous_chef.teardown() # update status and next job from sous chef. recipe.status = "stable" recipe.traceback = None recipe.last_run = dates.now() if len(sous_chef.next_job.keys()): recipe.last_job = sous_chef.next_job db.session.add(recipe) db.session.commit() return True except: # always delete the kwargs. if kw_key: rds.delete(kw_key) if not kw.get('passthrough', False): db.session.rollback() recipe.status = "error" recipe.traceback = format_exc() recipe.last_run = dates.now() db.session.add(recipe) db.session.commit() # notification tb = format_exc() error_notification(recipe, tb) return MerlynneError(tb) raise MerlynneError(format_exc())
def load_all(self, kwargs_key): """ Do the work. """ start = time.time() try: # create a session specific to this task session = gen_session() # get the inputs from redis kwargs = self.redis.get(kwargs_key) if not kwargs: raise InternalServerError( 'An unexpected error occurred while processing bulk upload.' ) kwargs = pickle_to_obj(kwargs) data = kwargs.get('data') kw = kwargs.get('kw') # delete them self.redis.delete(kwargs_key) outputs = [] errors = [] fx = partial(self._load_one, **kw) if self.concurrent: pool = Pool(min([len(data), self.max_workers])) for res in pool.imap_unordered(fx, data): if isinstance(res, Exception): errors.append(res) else: outputs.append(res) else: for item in data: res = fx(item) if isinstance(res, Exception): errors.append(res) else: outputs.append(res) # return errors if len(errors): self._handle_errors(errors) # add objects and execute if self.returns == 'model': for o in outputs: if o is not None: try: session.add(o) session.commit(o) except Exception as e: self._handle_errors(e) # union all queries elif self.returns == 'query': for query in outputs: if query is not None: try: session.execute(query) except Exception as e: self._handle_errors(e) try: session.commit() except Exception as e: session.rollback() session.remove() self._handle_errors(e) # return true if everything worked. session.close() return True except JobTimeoutException: end = time.time() return InternalServerError( 'Bulk loading timed out after {} seconds'.format(end - start))
def deserialize(self, s): """ The function for deserializing the string returned from redis """ return pickle_to_obj(s)
def run_sous_chef(sous_chef_path, recipe_id, kw_key): """ Do the work. This exists outside the class in order to enable pickling. """ recipe = db.session.query(Recipe).get(recipe_id) try: # load in kwargs kw = rds.get(kw_key) if not kw: raise InternalServerError("An unexpected error occurred while attempting to run a Sous Chef.") kw = pickle_to_obj(kw) # delete them. rds.delete(kw_key) # import sous chef SousChef = import_sous_chef(sous_chef_path) # initialize it with kwargs sc = SousChef(**kw) # indicate that the job is running if not kw.get("passthrough", False): recipe.status = "running" db.session.add(recipe) db.session.commit() # cook it. data = sc.cook() # passthrough the data. if kw.get("passthrough", False): return data # otherwise just exhaust the generator if isgenerator(data): data = list(data) # teardown this recipe sc.teardown() # update status and next job from sous chef. recipe.status = "stable" recipe.traceback = None recipe.last_run = dates.now() if len(sc.next_job.keys()): recipe.last_job = sc.next_job db.session.add(recipe) db.session.commit() return True except Exception as e: # always delete the kwargs. rds.delete(kw_key) if kw.get("passthrough", False): raise MerlynneError(e) db.session.rollback() recipe.status = "error" recipe.traceback = format_exc() recipe.last_run = dates.now() db.session.add(recipe) db.session.commit() return MerlynneError(e)