def _run_get_new_deps(self): self.task.set_tracking_url = self.tracking_url_callback self.task.set_status_message = self.status_message_callback task_gen = self.task.run() self.task.set_tracking_url = None self.task.set_status_message = None if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) if all(t.complete() for t in new_req): next_send = getpaths(requires) else: new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] return new_deps
def _run_get_new_deps(self): self.task.set_tracking_url = self.status_reporter.update_tracking_url self.task.set_status_message = self.status_reporter.update_status self.task.set_progress_percentage = self.status_reporter.update_progress_percentage task_gen = self.task.run() self.task.set_tracking_url = None self.task.set_status_message = None self.task.set_progress_percentage = None if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) if all(t.complete() for t in new_req): next_send = getpaths(requires) else: new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] return new_deps
def _run_get_new_deps(self): # set task callbacks before running for reporter_attr, task_attr in six.iteritems(self.forward_reporter_callbacks): setattr(self.task, task_attr, getattr(self.status_reporter, reporter_attr)) task_gen = self.task.run() # reset task callbacks for reporter_attr, task_attr in six.iteritems(self.forward_reporter_callbacks): setattr(self.task, task_attr, None) if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) if all(t.complete() for t in new_req): next_send = getpaths(requires) else: new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] return new_deps
def _run_get_new_deps(self): try: task_gen = self.task.run( tracking_url_callback=self.tracking_url_callback) except TypeError as ex: if 'unexpected keyword argument' not in getattr( ex, 'message', ex.args[0]): raise task_gen = self.task.run() if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] if all(t.complete() for t in new_req): next_send = getpaths(requires) else: return new_deps
def _run_get_new_deps(self): run_again = False try: task_gen = self.task.run(tracking_url_callback=self.tracking_url_callback) except TypeError as ex: if 'unexpected keyword argument' not in getattr(ex, 'message', ex.args[0]): raise run_again = True if run_again: task_gen = self.task.run() if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] if all(t.complete() for t in new_req): next_send = getpaths(requires) else: return new_deps
def _run_get_new_deps(self): try: t0 = time.time() task_gen = self.task.run() finally: self.task.trigger_event( Event.PROCESSING_TIME, self.task, time.time() - t0) if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] if all(t.complete() for t in new_req): next_send = getpaths(requires) else: return new_deps
def output(self): tasks = [] if all(req.complete() for req in flatten(self.requires())): try: tasks = list(self.run()) except: logger.exception('%s failed at run() step; the exception will not be raised because Luigi is still building the graph.', repr(self)) # FIXME: conserve task structure: the generator actually create an # implicit array level even if a single task is yielded. # For now, we just handle the special singleton case. if len(tasks) == 1: tasks = tasks[0] return getpaths(tasks)
def _run_get_new_deps(self): self.task.set_tracking_url = self.tracking_url_callback self.task.set_status_message = self.status_message_callback def deprecated_tracking_url_callback(*args, **kwargs): warnings.warn( "tracking_url_callback in run() args is deprecated, use " "set_tracking_url instead.", DeprecationWarning) self.tracking_url_callback(*args, **kwargs) run_again = False try: task_gen = self.task.run( tracking_url_callback=deprecated_tracking_url_callback) except TypeError as ex: if 'unexpected keyword argument' not in str(ex): raise run_again = True if run_again: task_gen = self.task.run() self.task.set_tracking_url = None self.task.set_status_message = None if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] if all(t.complete() for t in new_req): next_send = getpaths(requires) else: return new_deps
def _run_get_new_deps(self): self.task.set_tracking_url = self.tracking_url_callback self.task.set_status_message = self.status_message_callback def deprecated_tracking_url_callback(*args, **kwargs): warnings.warn("tracking_url_callback in run() args is deprecated, use " "set_tracking_url instead.", DeprecationWarning) self.tracking_url_callback(*args, **kwargs) run_again = False try: task_gen = self.task.run(tracking_url_callback=deprecated_tracking_url_callback) except TypeError as ex: if 'unexpected keyword argument' not in str(ex): raise run_again = True if run_again: task_gen = self.task.run() self.task.set_tracking_url = None self.task.set_status_message = None if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] if all(t.complete() for t in new_req): next_send = getpaths(requires) else: return new_deps
def convert_tasks_to_manifest_if_necessary(input_tasks): # pylint: disable=invalid-name """ Provide a manifest for the input paths if there are too many of them. The configuration section "manifest" can contain a "threshold" option which, when exceeded, causes this function to return a URLManifestTask instead of the original input_tasks. """ all_input_tasks = task.flatten(input_tasks) targets = task.flatten(task.getpaths(all_input_tasks)) threshold = configuration.get_config().getint(CONFIG_SECTION, 'threshold', -1) if threshold > 0 and len(targets) >= threshold: log.debug( 'Using manifest since %d inputs are greater than or equal to the threshold %d', len(targets), threshold ) return [URLManifestTask(urls=[target.path for target in targets])] else: log.debug( 'Directly processing files since %d inputs are less than the threshold %d', len(targets), threshold ) return all_input_tasks
def convert_tasks_to_manifest_if_necessary(input_tasks): # pylint: disable=invalid-name """ Provide a manifest for the input paths if there are too many of them. The configuration section "manifest" can contain a "threshold" option which, when exceeded, causes this function to return a URLManifestTask instead of the original input_tasks. """ all_input_tasks = task.flatten(input_tasks) targets = task.flatten(task.getpaths(all_input_tasks)) threshold = configuration.get_config().getint(CONFIG_SECTION, 'threshold', -1) if threshold > 0 and len(targets) >= threshold: log.debug( 'Using manifest since %d inputs are greater than or equal to the threshold %d', len(targets), threshold) return [URLManifestTask(urls=[target.path for target in targets])] else: log.debug( 'Directly processing files since %d inputs are less than the threshold %d', len(targets), threshold) return all_input_tasks
def _run_get_new_deps(self): task_gen = self.task.run() if not isinstance(task_gen, types.GeneratorType): return None next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: return None new_req = flatten(requires) if all(t.complete() for t in new_req): next_send = getpaths(requires) else: new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] return new_deps
def output(self): return getpaths(self.requires())
def run(self): logger.info('[pid %s] Worker %s running %s', os.getpid(), self.worker_id, self.task.task_id) if self.random_seed: # Need to have different random seeds if running in separate processes random.seed((os.getpid(), time.time())) status = FAILED error_message = '' missing = [] new_deps = [] try: # Verify that all the tasks are fulfilled! missing = [ dep.task_id for dep in self.task.deps() if not dep.complete() ] if missing: deps = 'dependency' if len(missing) == 1 else 'dependencies' raise RuntimeError('Unfulfilled %s at run time: %s' % (deps, ', '.join(missing))) self.task.trigger_event(Event.START, self.task) t0 = time.time() status = None try: task_gen = self.task.run() if isinstance(task_gen, types.GeneratorType): # new deps next_send = None while True: try: if next_send is None: requires = six.next(task_gen) else: requires = task_gen.send(next_send) except StopIteration: break new_req = flatten(requires) status = (RUNNING if all( t.complete() for t in new_req) else SUSPENDED) new_deps = [(t.task_module, t.task_family, t.to_str_params()) for t in new_req] if status == RUNNING: self.result_queue.put((self.task.task_id, status, '', missing, new_deps)) next_send = getpaths(requires) new_deps = [] else: logger.info( '[pid %s] Worker %s new requirements %s', os.getpid(), self.worker_id, self.task.task_id) return finally: if status != SUSPENDED: self.task.trigger_event(Event.PROCESSING_TIME, self.task, time.time() - t0) error_message = json.dumps(self.task.on_success()) logger.info('[pid %s] Worker %s done %s', os.getpid(), self.worker_id, self.task.task_id) self.task.trigger_event(Event.SUCCESS, self.task) status = DONE except KeyboardInterrupt: raise except BaseException as ex: status = FAILED logger.exception("[pid %s] Worker %s failed %s", os.getpid(), self.worker_id, self.task) error_message = notifications.wrap_traceback( self.task.on_failure(ex)) self.task.trigger_event(Event.FAILURE, self.task, ex) subject = "Luigi: %s FAILED" % self.task notifications.send_error_email(subject, error_message) finally: self.result_queue.put( (self.task.task_id, status, error_message, missing, new_deps))