def emit(self, data_bundle): assert isinstance(data_bundle, DataBundle) self._queue.put((copy.copy(data_bundle), self._from_node), block=True) logger.debug( "emit data in edge", from_node=self.from_node, to_node=self.to_node, data_bundle=data_bundle, )
def add_edge(self, from_task, to_task): """add a new edge between from task and to task""" self._task_set.add(from_task) self._task_set.add(to_task) edge = GraphEdge(from_task.graph_node, to_task.graph_node) from_task.graph_node.out_edges.add(edge) to_task.graph_node.in_edges.add(edge) logger.debug("add new edge", from_task=str(from_task), to_task=str(to_task)) return self
def _execute(self, data_bundle): # preprocessing input code string first code = "\n".join( string_util.left_padding_strings(data_bundle["code"].split("\n"))) # filepath is not mandatory filepath = data_bundle.data_dict.get("filepath", "N/A") try: self._emit( DataBundle(data_dict={ "code": code, "filepath": filepath })) logger.debug("parse input {} succeeded".format(code)) except: logger.error("parse input {} failed".format(code))
def pull(self, blocked=False, timeout=None): try: data = self._queue.get(block=blocked, timeout=timeout) logger.debug( "pull data in edge succeeded", from_node=self._from_node, to_node=self._to_node, data=data, ) return data except: # no data available logger.debug( "pull data in edge failed", from_node=self._from_node, to_node=self._to_node, ) return (None, None)
def schedule(self): pool = ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) task_futures = [] for task in self._computational_graph.tasks(): task_futures.append((task, pool.submit(task.execute))) logger.debug("schedule task in multi thread scheduler", task=str(task)) for (task, task_future) in task_futures: task_future.result() logger.debug("task completed in multi thread scheduler", task=str(task)) pool.shutdown() logger.debug("shutdown thread pool in multi thread scheduler")
def add_task(self, task): self._task_set.add(task) logger.debug("add new task", task=str(task)) return self
def schedule(self): """execute all tasks in topological order""" for task in self._computational_graph.tasks(): task.execute() logger.debug("schedule task in linear scheduler", task=str(task)) logger.debug("all tasks completed in linear scheduler")