return tree if __name__ == '__main__': from graph import Graph, Node, Arc g = Graph() g.add_node(Node('A')) g.add_node(Node('B')) g.add_node(Node('C')) g.add_node(Node('D')) g.add_node(Node('E')) g.add_node(Node('F')) g.add_arc(Arc('A','B',1)) g.add_arc(Arc('B','A',1)) g.add_arc(Arc('A','E',3)) g.add_arc(Arc('A','D',4)) g.add_arc(Arc('B','D',4)) g.add_arc(Arc('B','E',2)) g.add_arc(Arc('C','E',4)) g.add_arc(Arc('C','F',5)) g.add_arc(Arc('D','A',4)) g.add_arc(Arc('D','B',4)) g.add_arc(Arc('D','E',4)) g.add_arc(Arc('E','A',3)) g.add_arc(Arc('E','B',2)) g.add_arc(Arc('E','C',4)) g.add_arc(Arc('E','D',4)) g.add_arc(Arc('E','F',7))
class Pipeline: def __init__(self, verbose = 0): self.job_counter = 0 self.jobs = {} self.job_graph = Graph() self.previous_job_id = -1 self.verbose = verbose def add_job(self, command, args = (), name = None, dependencies = None, input_files = [], output_files = [], depend_last_job = True, force_out_of_date = False): if name == None: name = "Job " + str(self.job_counter) new_job = Job(command = command % args, name = name, file_inputs = input_files, file_outputs = output_files, force_out_of_date = force_out_of_date) # Store new job both by ID and by name self.jobs[self.job_counter] = new_job self.jobs[name] = new_job self.job_graph.add_node(new_job) if dependencies: for dependency in dependencies: if dependency in self.jobs.keys(): job_parent = self.jobs[dependency] self.job_graph.add_arc(job_parent, new_job) if self.verbose > 0: print "*** Dependency registered: " + str(job_parent) + " -> " + str(new_job) else: raise DependencyNotFoundError("Specified dependency '" + str(dependency) + "' is not in pipeline job list") if self.previous_job_id >= 0 and depend_last_job and dependencies is None: self.job_graph.add_arc(self.jobs[self.previous_job_id], new_job) self.previous_job_id = self.job_counter self.job_counter += 1 return self.previous_job_id def execute(self, engine = ExecutionEngineFactory.get_best_engine()): config = get_configuration() log_setting = config.get("logging", None) if log_setting == "stderr": log_to_stderr() engine.set_graph(self.job_graph) return engine.execute() @staticmethod def run(engine = ExecutionEngineFactory.get_best_engine(), max_parallel=None, max_parallel_buffer=0, terminate_on_nonzero_exit=True, stop_all_on_failure=True): if max_parallel is not None: engine.set_max_parallel(max_parallel, max_parallel_buffer) elif max_parallel_buffer > 0: engine.set_max_parallel_buffer(max_parallel_buffer) engine.terminate_on_nonzero_exit = terminate_on_nonzero_exit engine.stop_all_on_failure = stop_all_on_failure engine.set_graph(Pipeline.get_default_instance().job_graph) return Pipeline.get_default_instance().execute(engine) @staticmethod def get_default_instance(): if not hasattr(Pipeline, "default_pipeline"): Pipeline.default_pipeline = Pipeline() return Pipeline.default_pipeline