Пример #1
0
def validate_dependency_debug(graph):
    assert graph, "Graph is empty,please check!!"
    #convert_to_std_list_graph(graph)
    #dag.from_dict(graph)
    #noinspection PyBroadException
    is_valid = False
    dag = DAG()
    try:
        for key, value in graph.items():
            dag.add_node(key)

        for key, value in graph.items():
            for v in value:
                if v is not '':
                    dag.add_edge(key, v)
    except KeyError as e:
        logger.log_error("KeyError while adding to dag, msg is {}".format(e))
    except DAGValidationError as e:
        logger.log_error(
            "DAGValidationError while adding to dag, msg is {},please check dependent relationship"
            .format(e))
    except Exception as e:
        logger.log_error("Exception while adding to dag, msg is {}".format(e))
    else:
        is_valid = True

    assert is_valid, "===> key(testcase):{},value(dependent):{}".format(
        key, value)
Пример #2
0
def generate_dag(optimal_indvidual, stage_name, num_nodes):
    # create nodes for the graph
    nodes = np.empty((0), dtype=np.str)
    for n in range(1, (num_nodes + 1)):
        nodes = np.append(nodes, ''.join([stage_name, "_", str(n)]))

    # initialize directed asyclic graph (DAG) and add nodes to it
    dag = DAG()
    for n in nodes:
        dag.add_node(n)

    # split best indvidual found via GA to identify vertices connections and connect them in DAG
    edges = np.split(optimal_indvidual, np.cumsum(range(num_nodes - 1)))[1:]
    v2 = 2
    for e in edges:
        v1 = 1
        for i in e:
            if i:
                dag.add_edge(''.join([stage_name, "_",
                                      str(v1)]),
                             ''.join([stage_name, "_",
                                      str(v2)]))
            v1 += 1
        v2 += 1

    # delete nodes not connected to anyother node from DAG
    for n in nodes:
        if len(dag.predecessors(n)) == 0 and len(dag.downstream(n)) == 0:
            dag.delete_node(n)
            nodes = np.delete(nodes, np.where(nodes == n)[0][0])

    return dag, nodes
Пример #3
0
def create_random_dag(node_names, node_user_map):
    """
    Randomly generates a DAG graph. Start of by creating a list that represents the hierarchy.
    Each element in the list is another list showing the nodes in that level.
    The number of nodes in each level is random. Then use this hierarchy to create nodes
    and edges between nodes. Edges are created by randomly selecting a node in the previous level
    as a parent.

    Args:
        node_names (list): list of all the nodes to be used
        node_user_map (dict): use node name as a key to get all the users for node
    
    Returns:
        graph (DAG): returns a randomly generated DAG object
    """
    # the number of nodes to create will be the same as the length of the node_names list
    node_num = len(node_names)
    hierarchy = []
    curr_num_of_nodes = 0
    hierarchy.append([curr_num_of_nodes])
    curr_num_of_nodes += 1
    # create a hierarchy for the nodes
    while curr_num_of_nodes < node_num:
        nodes_to_create = random.choice(
            list(range(curr_num_of_nodes, node_num)))
        level = [i for i in range(curr_num_of_nodes, nodes_to_create + 1)]
        curr_num_of_nodes += len(level)
        hierarchy.append(level)

    # create empty graph object without passing in input matrix
    graph = DAG(node_names, node_user_map)

    # use the hierarchy to create the nodes and edges
    for level in range(len(hierarchy)):
        if level == 0:
            graph.add_node(f"Node 0", node_user_map["Node 0"])
        else:
            for num in hierarchy[level]:
                curr_node_name = f"Node {num}"
                graph.add_node(curr_node_name, node_user_map[curr_node_name])
                parent_level = level - 1
                # randomly choose a node a level above in the hierarchy as the parent
                parent_node_num = random.choice(hierarchy[parent_level])
                parent_node_name = f"Node {parent_node_num}"
                graph.add_edge(parent_node_name, curr_node_name)

    # for node in graph.node_list:
    # print(f"node: {node}, edges: {graph.node_list[node].edges.keys()}")
    return graph
Пример #4
0
def generate_dag(optimal_indvidual, stage_name, num_nodes):
    # optimal_individual为本stage的二进制字符串
    # create nodes for the graph
    nodes = np.empty((0), dtype=np.str)
    # 给stage的节点命名,比如s1 stage,节点名字为s1_1,s1_2,...
    for n in range(1, (num_nodes + 1)):
        nodes = np.append(nodes, ''.join([stage_name, "_", str(n)]))

    # initialize directed asyclic graph (DAG) and add nodes to it
    # 加入所有节点
    dag = DAG()
    for n in nodes:
        dag.add_node(n)

    # split best indvidual found via GA to identify vertices connections and connect them in DAG
    # cumsum累积和,cumsum([0, 1, 2, 3])返回[0, 1, 3, 6]
    # 在这里体现为比如有4个node,二进制字符串长度为6,切割成s[:0], s[0:1], s[1:3], s[3:6]
    # 即连接每个节点的二进制字符串
    # 最后再删除第一个节点没有连的数据(上面的s[:0])
    edges = np.split(optimal_indvidual, np.cumsum(range(num_nodes - 1)))[1:]
    v2 = 2
    # 遍历所有节点的连接情况
    for e in edges:
        v1 = 1
        # 遍历这个节点的二进制字符串
        # 如果是1,添加边到dag
        # 这里其实for循环替代v1会好看些
        for i in e:
            if i:
                dag.add_edge(''.join([stage_name, "_",
                                      str(v1)]),
                             ''.join([stage_name, "_",
                                      str(v2)]))
            v1 += 1
        v2 += 1

    # delete nodes not connected to anyother node from DAG
    # 删除孤立的点
    for n in nodes:
        if len(dag.predecessors(n)) == 0 and len(dag.downstream(n)) == 0:
            dag.delete_node(n)
            nodes = np.delete(nodes, np.where(nodes == n)[0][0])

    return dag, nodes
Пример #5
0
 def test_add(self):
     g = DAG()
     g.add_node(1)
     g.add_node(2)
     g.add_node(3, inputs = {1,2})
     g.add_node(0, outputs={1})
     self.assertEqual(g._head_nodes, g.to_nodes({0,2}))
     self.assertEqual(g._leaf_nodes, g.to_nodes({3}))
Пример #6
0
 def make_diamond():
     g = DAG()
     g.add_node(0)
     g.add_node(1, inputs={0})
     g.add_node(2, inputs={0})
     g.add_node(3, inputs={1,2})
     
     return g
Пример #7
0
class SearchSpace(dict):
    """A search space for HyperParameter.

    :param hps: a dict contain HyperParameters, condition and forbidden.
    :type hps: dict, default is `None`.
    """
    def __init__(self, desc=None):
        """Init SearchSpace."""
        super(SearchSpace, self).__init__()
        if desc is None:
            desc = SearchSpaceConfig().to_dict()
            if desc.type is not None:
                desc = ClassFactory.get_cls(ClassType.SEARCHSPACE,
                                            desc.type).get_space(desc)
        for name, item in desc.items():
            self.__setattr__(name, item)
            self.__setitem__(name, item)
        self._params = OrderedDict()
        self._condition_dict = OrderedDict()
        self._forbidden_list = []
        self._hp_count = 0
        self._dag = DAG()
        if desc is not None:
            self.form_desc(desc)

    @classmethod
    def get_space(self, desc):
        """Get Space."""
        return desc

    def form_desc(self, desc):
        """Create SearchSpace base on hyper-parameters object."""
        if 'hyperparameters' not in desc:
            return
        for space_dict in desc["hyperparameters"]:
            param = ParamsFactory.create_search_space(
                param_name=space_dict.get("key"),
                param_slice=space_dict.get('slice'),
                param_type=PARAM_TYPE_MAP[space_dict.get("type").upper()],
                param_range=space_dict.get("range"),
                generator=space_dict.get("generator"),
                sample_num=space_dict.get('sample_num'))
            self.add_hp(param)
        if "condition" in desc:
            for condition in desc["condition"]:
                _condition = ParamsFactory.create_condition(
                    self.get_hp(condition.get("child")),
                    self.get_hp(condition.get("parent")),
                    CONDITION_TYPE_MAP[condition.get("type").upper()],
                    condition.get("range"))
                self.add_condition(_condition)
        if "forbidden" in desc:
            for forbiddens in desc["forbidden"]:
                _forbiddens = []
                for _name, _value in forbiddens.items():
                    _forbiddens.append(
                        ForbiddenEqualsClause(param_name=self.get_hp(_name),
                                              value=_value))
                self.add_forbidden_clause(ForbiddenAndConjunction(_forbiddens))

    def sample(self):
        """Get the Sample of SearchSpace."""
        return self.decode(self.get_sample_space(1)[0])

    def verify_constraints(self, sample):
        """Verify condition."""
        for condition in self.get("condition", []):
            _type = condition["type"]
            child = condition["child"]  # eg. trainer.optimizer.params.momentum
            parent = condition["parent"]  # eg. trainer.optimizer.type
            _range = condition["range"]  # eg. range': ['SGD']
            if _type == "EQUAL" or _type == "IN":
                if parent in sample and sample[parent] in _range:
                    if child not in sample:
                        sample[child] = self.get_hp(child).sample()[0]
                elif child in sample:
                    del sample[child]
            if _type == "NOT_EQUAL":
                if parent in sample and sample[parent] in _range:
                    if child in sample:
                        del sample[child]
                elif child not in sample:
                    sample[child] = self.get_hp(child).sample()[0]
            # TODO condition type: IN, parent type: range
        return sample

    def size(self):
        """Get the size of SearchSpace, also the count of HyperParametera contained in this SearchSpace.

        :return: the size of SearchSpace.
        :rtype: int.

        """
        return self._hp_count

    def add_params(self, params):
        """Add params to the search space.

        :param list prams: List[HyperParameter].
        :return: List of added hyperparameters (same as input)
        :rtype: list

        """
        for param in params:
            if not ParamsFactory.is_params(param):
                raise TypeError("HyperParameter '%s' is not an instance of "
                                "SearchSpace.common.hyper_parameter."
                                "HyperParameter." % str(params))

        for param in params:
            self._add_hp(param)
        self._sort_hps()
        return self

    def add_hp(self, hyperparameter):
        """Add one hyperparameter to the hyperparameter space.

        :param HyperParameter hyperparameter: instance of `HyperParameter` to add.
        :return: hyperparameter (same as input)
        :rtype: HyperParameter

        """
        if not ParamsFactory.is_params(hyperparameter):
            raise TypeError("The method add_hp must be called "
                            "with an instance of SearchSpace."
                            "hyper_parameter.HyperParameter.")

        self._add_hp(hyperparameter)
        return self

    def _add_hp(self, hyperparameter):
        """Add one hyperparameter to the hyperparameter space.

        :param HyperParameter hyperparameter: instance of `HyperParameter` to add.

        """
        if hyperparameter.name in self._params:
            raise ValueError("HyperParameter `%s` is already in SearchSpace!" %
                             hyperparameter.name)
        self._params[hyperparameter.name] = hyperparameter
        self._hp_count = self._hp_count + 1
        self._dag.add_node(hyperparameter.name)

    def add_condition(self, condition):
        """Add new condition to the current SearchSpace.

        :param condition: `condition` that need to add.
        :type condition: instance of `Condition`.
        """
        if not ParamsFactory.is_condition(condition):
            raise ValueError('Not a valid condition {}'.format(condition))
        child_name = condition.child.name
        parent_name = condition.parent.name
        try:
            self._dag.add_edge(parent_name, child_name)
        except KeyError:
            raise KeyError('Hyperparameter in condition {} not exist in'
                           'current SearchSpace.'.format(condition))
        except DAGValidationError:
            raise KeyError('Current condition {} valid DAG rule in current'
                           'SearchSpace, can not be added!'.format(condition))
        if parent_name not in self._condition_dict:
            self._condition_dict[parent_name] = {}
        self._condition_dict[parent_name][child_name] = condition

    def add_forbidden_clause(self, forbidden_conjunction):
        """Add new ForbiddenAndConjunction to the current SearchSpace.

        :param forbidden_conjunction:  ForbiddenAndConjunction
        :type forbidden_conjunction: instance of `ForbiddenAndConjunction`.
        """
        if not isinstance(forbidden_conjunction, ForbiddenAndConjunction):
            raise ValueError(
                'Not a valid condition {}'.format(forbidden_conjunction))
        self._forbidden_list.append(forbidden_conjunction)

    def _sort_hps(self):
        """Sort the hyperparameter dictionary."""
        return

    def params(self):
        """Return the list of all hyperparameters.

        :return: List[HyperParameter]
        :rtype: list

        """
        return list(self._params.values())

    def get_hp_names(self):
        """Return the list of name of all hyperparameters.

        :return: List[str]
        :rtype: list

        """
        return list(self._params.keys())

    def get_hp(self, name):
        """Get HyperParameter by its name.

        :param str name: The name of HyperParameter.
        :return: HyperParameter
        :rtype: HyperParameter

        """
        hp = self._params.get(name)

        if hp is None:
            raise KeyError("HyperParameter '%s' does not exist in this "
                           "configuration space." % name)
        else:
            return hp

    def get_sample_space(self, n=1000, gridding=False):
        """Get the sampled param space from the current SearchSpace.

        :param int n: number of samples.
        :param bool gridding: use gridding sample or random sample.
        :return: shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        if gridding:
            return self._get_grid_sample_space()
        else:
            return self._get_random_sample_space(n)

    def _get_random_sample_space(self, n):
        """Get the sampled param space from the current SearchSpace.

        here we use the random sample, and return a np array of shape
        n*_hp_count, which is a sampled param space for GP or
        other model to predict.

        :param int n: sample count.
        :return: shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        parameters_array = np.zeros((n, self._hp_count))
        i = 0
        for _, hp in self._params.items():
            column = hp.sample(n=n, decode=False)
            parameters_array[:, i] = column
            i = i + 1
        return parameters_array

    def _generate_grid(self):
        """Get the all possible values for each of the tunables."""
        grid_axes = []
        for _, hp in self._params.items():
            grid_axes.append(hp.get_grid_axis(hp.slice))
        return grid_axes

    def _get_grid_sample_space(self):
        """Get the sampled param space from the current SearchSpace.

        here we use the random sample, and return a np array of shape
        n*len(_hyperparameters), which is a sampled param space for GP or
        other model to predict.

        :return: np.array, shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        param_list = [[]]
        params_grid = self._generate_grid()
        for param_grid in params_grid:
            param_list = [
                param_x + [param_y] for param_x in param_list
                for param_y in param_grid
            ]
        return param_list

    def decode(self, param_list):
        """Inverse transform a param list to original param dict.

        :param list param_list: the param list come from a search,
            in which params order are same with self._hyperparameters
        :return: the inverse transformed param dictionary.
        :rtype: dict

        """
        if len(param_list) != self._hp_count:
            raise ValueError(
                "param_list length not equal to SearchSpace size!")
        i = 0
        assigned_forbidden_dict = {}
        inversed_param_dict = {}
        final_param_dict = {}
        for name, hp in self._params.items():
            param_value = param_list[i]

            forbidden_flag = False
            forbidden_value = []
            for forbidden_conjunction in self._forbidden_list:
                if name in forbidden_conjunction._forbidden_dict:
                    forbidden_flag = True

                    total_len = assigned_forbidden_dict.__len__(
                    ) + forbidden_conjunction._forbidden_dict.__len__()
                    union_len = len(
                        set(
                            list(assigned_forbidden_dict.items()) +
                            list(forbidden_conjunction._forbidden_dict.items())
                        ))
                    # if assigned_forbidden_dict has same or similar forbidden conjunction
                    #  with `forbidden_conjunction`.
                    if (total_len - union_len) == \
                            forbidden_conjunction._forbidden_dict.__len__() - 1:
                        forbidden_value.append(
                            forbidden_conjunction._forbidden_dict.get(name))

            inversed_param_dict[name] = hp.decode(param_value, forbidden_value)
            if forbidden_flag:
                assigned_forbidden_dict[name] = inversed_param_dict[name]

            i = i + 1
        # check condition vaild
        # use DAG Breadth-First-Search to check each condition
        q = Queue()
        for ind_name in self._dag.ind_nodes():
            q.put(ind_name)
        while not q.empty():
            parent = q.get()
            final_param_dict[parent] = inversed_param_dict[parent]
            child_list = self._dag.downstream(parent)
            for child in child_list:
                condition = self._condition_dict[parent][child]
                if condition.evaluate(inversed_param_dict[parent]):
                    q.put(child)
        return final_param_dict
Пример #8
0
class HyperparameterSpace(object):
    """A search space for HyperParameter.

    :param hps: a dict contain HyperParameters, condition and forbidden.
    :type hps: dict, default is `None`.
    """
    def __init__(self, hps=None):
        """Init HyperparameterSpace."""
        self._hyperparameters = OrderedDict()
        self._condition_dict = OrderedDict()
        self._forbidden_list = []
        self._hp_count = 0
        self._dag = DAG()
        if hps is not None:
            self._hps2ds(hps)

    @classmethod
    def create(cls, hps):
        """Class method, create hyperparameter space.

        If hyperparameters not in hps, return None
        """
        if not hps:
            return cls()
        if "hyperparameters" not in hps:
            return None
        return cls(hps)

    def _hps2ds(self, hps):
        """Create HyperparameterSpace base on hyper-parameters object."""
        for hp in hps["hyperparameters"]:
            _hp = HyperParameter(
                param_name=hp.get("name"),
                param_slice=hp.get('slice'),
                param_type=PARAM_TYPE_MAP[hp.get("type").upper()],
                param_range=hp.get("range"))
            self.add_hyperparameter(_hp)
        if "condition" in hps:
            for condition in hps["condition"]:
                _condition = Condition(
                    self.get_hyperparameter(condition.get("child")),
                    self.get_hyperparameter(condition.get("parent")),
                    CONDITION_TYPE_MAP[condition.get("type").upper()],
                    condition.get("range"))
                self.add_condition(_condition)
        if "forbidden" in hps:
            for forbiddens in hps["forbidden"]:
                _forbiddens = []
                for _name, _value in forbiddens.items():
                    _forbiddens.append(
                        ForbiddenEqualsClause(
                            param_name=self.get_hyperparameter(_name),
                            value=_value))
                self.add_forbidden_clause(ForbiddenAndConjunction(_forbiddens))

    def size(self):
        """Get the size of HyperparameterSpace, also the count of HyperParametera contained in this HyperparameterSpace.

        :return: the size of HyperparameterSpace.
        :rtype: int.

        """
        return self._hp_count

    def add_hyperparameters(self, hyperparameters):
        """Add hyperparameters to the hyperparameter space.

        :param list hyperparameters: List[HyperParameter].
        :return: List of added hyperparameters (same as input)
        :rtype: list

        """
        for hyperparameter in hyperparameters:
            if not isinstance(hyperparameter, HyperParameter):
                raise TypeError("HyperParameter '%s' is not an instance of "
                                "HyperparameterSpace.common.hyper_parameter."
                                "HyperParameter." % str(hyperparameter))

        for hyperparameter in hyperparameters:
            self._add_hyperparameter(hyperparameter)
        self._sort_hyperparameters()
        return hyperparameters

    def add_hyperparameter(self, hyperparameter):
        """Add one hyperparameter to the hyperparameter space.

        :param HyperParameter hyperparameter: instance of `HyperParameter` to add.
        :return: hyperparameter (same as input)
        :rtype: HyperParameter

        """
        if not isinstance(hyperparameter, HyperParameter):
            raise TypeError("The method add_hyperparameter must be called "
                            "with an instance of HyperparameterSpace.common."
                            "hyper_parameter.HyperParameter.")

        self._add_hyperparameter(hyperparameter)
        return hyperparameter

    def _add_hyperparameter(self, hyperparameter):
        """Add one hyperparameter to the hyperparameter space.

        :param HyperParameter hyperparameter: instance of `HyperParameter` to add.

        """
        if hyperparameter.name in self._hyperparameters:
            raise ValueError(
                "HyperParameter `%s` is already in HyperparameterSpace!" %
                hyperparameter.name)
        self._hyperparameters[hyperparameter.name] = hyperparameter
        self._hp_count = self._hp_count + 1
        self._dag.add_node(hyperparameter.name)

    def add_condition(self, condition):
        """Add new condition to the current HyperparameterSpace.

        :param condition: `condition` that need to add.
        :type condition: instance of `Condition`.
        """
        if not isinstance(condition, Condition):
            raise ValueError('Not a valid condition {}'.format(condition))
        child_name = condition.child.name
        parent_name = condition.parent.name
        try:
            self._dag.add_edge(parent_name, child_name)
        except KeyError:
            raise KeyError('Hyperparameter in condition {} not exist in'
                           'current HyperparameterSpace.'.format(condition))
        except DAGValidationError:
            raise KeyError(
                'Current condition {} valid DAG rule in current'
                'HyperparameterSpace, can not be added!'.format(condition))
        if parent_name not in self._condition_dict:
            self._condition_dict[parent_name] = {}
        self._condition_dict[parent_name][child_name] = condition

    def add_forbidden_clause(self, forbidden_conjunction):
        """Add new ForbiddenAndConjunction to the current HyperparameterSpace.

        :param forbidden_conjunction:  ForbiddenAndConjunction
        :type forbidden_conjunction: instance of `ForbiddenAndConjunction`.
        """
        if not isinstance(forbidden_conjunction, ForbiddenAndConjunction):
            raise ValueError(
                'Not a valid condition {}'.format(forbidden_conjunction))
        self._forbidden_list.append(forbidden_conjunction)

    def _sort_hyperparameters(self):
        """Sort the hyperparameter dictionary."""
        return

    def get_hyperparameters(self):
        """Return the list of all hyperparameters.

        :return: List[HyperParameter]
        :rtype: list

        """
        return list(self._hyperparameters.values())

    def get_hyperparameter_names(self):
        """Return the list of name of all hyperparameters.

        :return: List[str]
        :rtype: list

        """
        return list(self._hyperparameters.keys())

    def get_hyperparameter(self, name):
        """Get HyperParameter by its name.

        :param str name: The name of HyperParameter.
        :return: HyperParameter
        :rtype: HyperParameter

        """
        hp = self._hyperparameters.get(name)

        if hp is None:
            raise KeyError("HyperParameter '%s' does not exist in this "
                           "configuration space." % name)
        else:
            return hp

    def get_sample_space(self, n=1000, gridding=False):
        """Get the sampled param space from the current HyperparameterSpace.

        :param int n: number of samples.
        :param bool gridding: use gridding sample or random sample.
        :return: shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        if gridding:
            return self._get_grid_sample_space()
        else:
            return self._get_random_sample_space(n)

    def _get_random_sample_space(self, n):
        """Get the sampled param space from the current HyperparameterSpace.

        here we use the random sample, and return a np array of shape
        n*_hp_count, which is a sampled param space for GP or
        other model to predict.

        :param int n: sample count.
        :return: shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        parameters_array = np.zeros((n, self._hp_count))
        i = 0
        for _, hp in self._hyperparameters.items():
            low, high = hp.range
            if hp.is_integer:
                column = np.random.randint(low, high + 1, size=n)
            else:
                d = high - low
                column = low + d * np.random.rand(n)
            parameters_array[:, i] = column
            i = i + 1
        return parameters_array

    def _generate_grid(self):
        """Get the all possible values for each of the tunables."""
        grid_axes = []
        for _, hp in self._hyperparameters.items():
            grid_axes.append(hp.get_grid_axis(hp.slice))
        return grid_axes

    def _get_grid_sample_space(self):
        """Get the sampled param space from the current HyperparameterSpace.

        here we use the random sample, and return a np array of shape
        n*len(_hyperparameters), which is a sampled param space for GP or
        other model to predict.

        :return: np.array, shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        param_list = [[]]
        params_grid = self._generate_grid()
        for param_grid in params_grid:
            param_list = [
                param_x + [param_y] for param_x in param_list
                for param_y in param_grid
            ]
        return param_list

    def inverse_transform(self, param_list):
        """Inverse transform a param list to original param dict.

        :param list param_list: the param list come from a search,
            in which params order are same with self._hyperparameters
        :return: the inverse transformed param dictionary.
        :rtype: dict

        """
        if len(param_list) != self._hp_count:
            raise ValueError(
                "param_list length not equal to HyperparameterSpace size!")
        i = 0
        assigned_forbidden_dict = {}
        inversed_param_dict = {}
        final_param_dict = {}
        for name, hp in self._hyperparameters.items():
            param_value = param_list[i]

            forbidden_flag = False
            forbidden_value = []
            for forbidden_conjunction in self._forbidden_list:
                if name in forbidden_conjunction._forbidden_dict:
                    forbidden_flag = True

                    total_len = assigned_forbidden_dict.__len__() + \
                        forbidden_conjunction._forbidden_dict.__len__()
                    union_len = len(
                        set(
                            list(assigned_forbidden_dict.items()) +
                            list(forbidden_conjunction._forbidden_dict.items())
                        ))
                    # if assigned_forbidden_dict has same or similar forbidden conjunction
                    #  with `forbidden_conjunction`.
                    if (total_len - union_len) == \
                            forbidden_conjunction._forbidden_dict.__len__() - 1:
                        forbidden_value.append(
                            forbidden_conjunction._forbidden_dict.get(name))

            inversed_param_dict[name] = \
                hp.inverse_transform(param_value, forbidden_value)
            if forbidden_flag:
                assigned_forbidden_dict[name] = inversed_param_dict[name]

            i = i + 1
        # check condition vaild
        # use DAG Breadth-First-Search to check each condition
        q = Queue()
        for ind_name in self._dag.ind_nodes():
            q.put(ind_name)
        while not q.empty():
            parent = q.get()
            final_param_dict[parent] = inversed_param_dict[parent]
            child_list = self._dag.downstream(parent)
            for child in child_list:
                condition = self._condition_dict[parent][child]
                if condition.evaluate(inversed_param_dict[parent]):
                    q.put(child)
        return final_param_dict
Пример #9
0
class Pipeline:

    def __init__(self, input_file):
        self.cfg = self._read(input_file)

        self.info = self.cfg['pipeline']

        self.owner = self.info['owner']
        self.basename = self.info['basename']
        self.version = self.info['version']
        self.dag = DAG()
        self.stages = {}

        for name in self.info['stages']:
            self.stages[name] = self.load_stage(name)
            self.dag.add_node(name)

        for name in self.info['stages']:
            stage_info = self.cfg[name]
            for parent in stage_info['depends-on']:
                self.dag.add_edge(parent, name)

    def build(self):
        for dirname in self.info['images']:
            os.system("cd {}; make".format(dirname))

    def push(self):
        for dirname in self.info['images']:
            os.system("cd {}; make push".format(dirname))

    def pull(self):
        for dirname in self.info['images']:
            os.system("cd {}; make pull".format(dirname))

    def load_stage(self, name):
        for dirname in self.info['images']:
            dirpath = os.path.join(dirname,name)
            dockerfile_path = os.path.join(dirpath, "Dockerfile")
            run_path = os.path.join(dirpath, "run.py")
            if os.path.isdir(dirpath) and os.path.isfile(dockerfile_path) and os.path.isfile(run_path):
                path = run_path
                break
        else:
            raise PipelineError("""No Stage called {} was found - needs to be in one
                of the images directories and contain Dockerfile, run.py""".format(name))

        # We want to load a module based on a python.  The python people keep changing how to do this in obscure
        # ways.  This one is deprecated but works back in python 3.4 which is what centos 7 can provide.
        loader = importlib.machinery.SourceFileLoader(name, path)
        module = loader.load_module()
        return module.Stage


    def input_tags(self):
        "Return a set of all input tags required by the pipeline and not generated inside it."
        pipeline_inputs = set()
        # Find all the inputs expected by the pipeline
        for stage in self.stages.values():
            pipeline_inputs.update(stage.inputs.keys())
        # Remove any stages that are generated by any step in the pipeline
        for stage in self.stages.values():
            pipeline_inputs.difference_update(stage.outputs.keys())

        return pipeline_inputs

    def output_filenames(self):
        outputs = set()
        # Find all the outputs generated by the pipeline
        for stage in self.stages.values():
            outputs += stage.get_output_filenames()

        return outputs



    def _read(self, input_file):
        "Read a YAML file represnting a pipline"
        if not hasattr(input_file, 'read'):
            input_file = open(input_file)
        info = yaml.load(input_file)
        return info



    def image_name(self, name):
        "Return the expected image name for a given stage based on information in the pipeline file"
        return '{}/{}-{}:{}'.format(self.owner,self.basename, name, self.version)


    def sequence(self):
        "Return an acceptable serial ordering for the pipeline elements"
        order = self.dag.topological_sort()
        return [(name,self.stages[name]) for name in order]


    def dependencies(self, name):
        return self.dag.predecessors(name)