Exemplo n.º 1
0
    def __init__(self, node_dict):
        super(DAG, self).__init__(node_dict)

        self.subnodes = None
        # TODO make a function to look for parameter
        for parameter in self.node.parameters:
            if parameter.name == '_nodes':
                self.subnodes = parameter.value.value

        assert self.subnodes is not None, 'Could not find subnodes'

        self.node_id_to_node = {
            node._id: node for node in self.subnodes
        }

        # number of dependencies to ids
        self.dependency_index_to_node_ids = defaultdict(lambda: set())
        self.node_id_to_dependents = defaultdict(lambda: set())
        self.node_id_to_dependency_index = defaultdict(lambda: 0)
        self.uncompleted_nodes_count = 0

        self._node_running_status = NodeRunningStatus.READY

        for node in self.subnodes:
            node_id = node._id
            if node_id == SpecialNodeId.INPUT:
                updated_resources_count = 0
                for output in node.outputs:
                    for input in self.node.inputs:
                        if input.name == output.name:
                            updated_resources_count += 1
                            output.values = input.values
                if updated_resources_count != len(self.node.inputs):
                    raise Exception('Used {} inputs for {} outputs'.format(updated_resources_count, len(self.node.inputs)))

            # ignore nodes in finished statuses
            if NodeRunningStatus.is_finished(node.node_running_status) and node_id != SpecialNodeId.OUTPUT:
                continue
            dependency_index = 0
            for node_input in node.inputs:
                for input_reference in node_input.input_references:
                    dep_node_id = to_object_id(input_reference.node_id)
                    self.node_id_to_dependents[dep_node_id].add(node_id)
                    if not NodeRunningStatus.is_finished(self.node_id_to_node[dep_node_id].node_running_status):
                        dependency_index += 1

            if not NodeRunningStatus.is_finished(node.node_running_status):
                self.uncompleted_nodes_count += 1
            self.dependency_index_to_node_ids[dependency_index].add(node_id)
            self.node_id_to_dependency_index[node_id] = dependency_index

        self.monitoring_node_ids = set()

        if self.uncompleted_nodes_count == 0:
            self._node_running_status = NodeRunningStatus.SUCCESS
Exemplo n.º 2
0
    def pop_jobs(self):
        """Get a set of nodes with satisfied dependencies"""
        res = []
        logging.info("Pop jobs")

        for running_node_dict in node_collection_manager.get_db_nodes_by_ids(
                self.monitoring_node_ids):
            # check status
            if NodeRunningStatus.is_finished(
                    running_node_dict['node_running_status']):
                node = Node.from_dict(running_node_dict)
                self.update_node(node)
                self.monitoring_node_ids.remove(node._id)

        if NodeRunningStatus.is_failed(self.node.node_running_status):
            logging.info("Job in DAG failed, pop_jobs returns []")
            return res

        cached_nodes = []
        for node_id in self.dependency_index_to_node_ids[0]:
            """Get the node and init its inputs, i.e. filling its resource_ids"""
            orig_node = self.node_id_to_node[node_id]
            for node_input in orig_node.inputs:
                for input_reference in node_input.input_references:
                    node_input.values.extend(self.node_id_to_node[to_object_id(
                        input_reference.node_id)].get_output_by_name(
                            input_reference.output_id).values)
            orig_node.node_running_status = NodeRunningStatus.IN_QUEUE
            node = orig_node.copy()

            if DAG._cacheable(node) and False:  # !!! _cacheable is broken
                try:
                    cache = self.node_cache_manager.get(
                        node, self.graph.author)
                    if cache:
                        node.node_running_status = NodeRunningStatus.RESTORED
                        node.outputs = cache.outputs
                        node.logs = cache.logs
                        node.cache_url = '{}/graphs/{}?nid={}'.format(
                            self.WEB_CONFIG.endpoint.rstrip('/'),
                            str(cache.graph_id),
                            str(cache.node_id),
                        )
                        cached_nodes.append(node)
                        continue
                except Exception as err:
                    logging.exception(
                        "Unable to update cache: `{}`".format(err))
            res.append(node)
        del self.dependency_index_to_node_ids[0]

        for node in cached_nodes:
            self.update_node(node)

        return res
Exemplo n.º 3
0
    def run(self):
        while not self.finished():
            new_jobs = self.pop_jobs()
            if len(new_jobs) == 0:
                time.sleep(_GRAPH_ITERATION_SLEEP)
                continue

            for node in new_jobs:
                self._execute_node(node)

        is_succeeded = NodeRunningStatus.is_succeeded(
            self.node.node_running_status)
        if is_succeeded:
            for node in self.subnodes:
                if node._id != SpecialNodeId.OUTPUT:
                    continue
                updated_resources_count = 0
                for output in self.node.outputs:
                    for input in node.inputs:
                        if input.name == output.name:
                            output.values = input.values
                            updated_resources_count += 1
                            break
                if updated_resources_count != len(node.inputs):
                    raise Exception('Used {} inputs for {} outputs'.format(
                        updated_resources_count, len(node.inputs)))
        return JobReturnStatus.SUCCESS if is_succeeded else JobReturnStatus.FAILED
Exemplo n.º 4
0
    def _execute_node(self, node):
        if NodeRunningStatus.is_finished(
                node.node_running_status):  # NodeRunningStatus.SPECIAL
            return
        node.save(collection=Collections.RUNS)

        self.monitoring_node_ids.add(node._id)
Exemplo n.º 5
0
    def _execute_node(self, node):
        if NodeRunningStatus.is_finished(node.node_running_status):     # NodeRunningStatus.SPECIAL
            return
        node.author = self.node.author                                  # Change it to the author that runs it
        node.save(collection=Collections.RUNS)

        self.monitoring_node_ids.add(node._id)
Exemplo n.º 6
0
    def _set_node_status(self, node_id, node_running_status):
        node = self.node_id_to_node[node_id]
        node.node_running_status = node_running_status

        logging.info("Node running status {} {}".format(node_running_status, node.title))

        if node_running_status == NodeRunningStatus.FAILED:
            # TODO optional cancel based on parameter
            self.kill()
            self._node_running_status = NodeRunningStatus.FAILED_WAITING

        if node_running_status in {NodeRunningStatus.SUCCESS, NodeRunningStatus.RESTORED}:
            for dependent_node_id in self.node_id_to_dependents[node_id]:
                dependent_node = self.node_id_to_node[dependent_node_id]
                prev_dependency_index = self.node_id_to_dependency_index[dependent_node_id]

                removed_dependencies = 0
                for node_input in dependent_node.inputs:
                    for input_reference in node_input.input_references:
                        if to_object_id(input_reference.node_id) == to_object_id(node_id):
                            removed_dependencies += 1
                dependency_index = prev_dependency_index - removed_dependencies

                self.dependency_index_to_node_ids[prev_dependency_index].remove(dependent_node_id)
                self.dependency_index_to_node_ids[dependency_index].add(dependent_node_id)
                self.node_id_to_dependency_index[dependent_node_id] = dependency_index
            self.uncompleted_nodes_count -= 1

        if self.uncompleted_nodes_count == 0 and not NodeRunningStatus.is_failed(self._node_running_status):
            self._node_running_status = NodeRunningStatus.SUCCESS
Exemplo n.º 7
0
Arquivo: worker.py Projeto: rbax/plynx
 def call_executor_tick(self):
     while not self._stop_event.is_set():
         self._stop_event.wait(timeout=TickThread.TICK_TIMEOUT)
         if self.executor.is_updated():
             # Save logs whe operation is running
             with self.executor._lock:
                 if NodeRunningStatus.is_finished(
                         self.executor.node.node_running_status):
                     continue
                 self.executor.node.save(collection=Collections.RUNS)
Exemplo n.º 8
0
    def __init__(self, graph, node_collection=None):
        if isinstance(graph, Graph):
            self.graph_id = graph._id
            self.graph = graph
        else:
            self.graph_id = graph
            self.graph = Graph.load(self.graph_id)

        self.node_id_to_node = {
            node._id: node for node in self.graph.nodes
        }

        # number of dependencies to ids
        self.dependency_index_to_node_ids = defaultdict(lambda: set())
        self.node_id_to_dependents = defaultdict(lambda: set())
        self.node_id_to_dependency_index = defaultdict(lambda: 0)
        self.uncompleted_nodes_count = 0
        if node_collection:
            self.node_collection = node_collection
        else:
            self.node_collection = NodeCollection()

        for node in self.graph.nodes:
            # ignore nodes in finished statuses
            if NodeRunningStatus.is_finished(node.node_running_status):
                continue
            node_id = node._id
            dependency_index = 0
            for node_input in node.inputs:
                for input_value in node_input.values:
                    parent_node_id = to_object_id(input_value.node_id)
                    self.node_id_to_dependents[parent_node_id].add(node_id)
                    if not NodeRunningStatus.is_finished(self.node_id_to_node[parent_node_id].node_running_status):
                        dependency_index += 1

            if not NodeRunningStatus.is_finished(node.node_running_status):
                self.uncompleted_nodes_count += 1
            self.dependency_index_to_node_ids[dependency_index].add(node_id)
            self.node_id_to_dependency_index[node_id] = dependency_index
Exemplo n.º 9
0
 def update_node(self, worker_id, node):
     """Return True Job is in the queue, else False."""
     job_description = self.get_job_description(worker_id)
     if job_description:
         with self._graph_schedulers_lock:
             scheduler = self._graph_id_to_scheduler.get(
                 job_description.graph_id, None)
             if scheduler:
                 scheduler.update_node(node)
         if NodeRunningStatus.is_finished(node.node_running_status):
             self._del_job_description(worker_id)
         return True
     else:
         logging.info(
             "Scheduler was not found for worker `{}`".format(worker_id))
     return False