Пример #1
0
def generate_dag(optimal_indvidual, stage_name, num_nodes):
    # create nodes for the graph
    nodes = np.empty((0), dtype=np.str)
    for n in range(1, (num_nodes + 1)):
        nodes = np.append(nodes, ''.join([stage_name, "_", str(n)]))

    # initialize directed asyclic graph (DAG) and add nodes to it
    dag = DAG()
    for n in nodes:
        dag.add_node(n)

    # split best indvidual found via GA to identify vertices connections and connect them in DAG
    edges = np.split(optimal_indvidual, np.cumsum(range(num_nodes - 1)))[1:]
    v2 = 2
    for e in edges:
        v1 = 1
        for i in e:
            if i:
                dag.add_edge(''.join([stage_name, "_",
                                      str(v1)]),
                             ''.join([stage_name, "_",
                                      str(v2)]))
            v1 += 1
        v2 += 1

    # delete nodes not connected to anyother node from DAG
    for n in nodes:
        if len(dag.predecessors(n)) == 0 and len(dag.downstream(n)) == 0:
            dag.delete_node(n)
            nodes = np.delete(nodes, np.where(nodes == n)[0][0])

    return dag, nodes
Пример #2
0
def generate_dag(optimal_indvidual, stage_name, num_nodes):
    # optimal_individual为本stage的二进制字符串
    # create nodes for the graph
    nodes = np.empty((0), dtype=np.str)
    # 给stage的节点命名,比如s1 stage,节点名字为s1_1,s1_2,...
    for n in range(1, (num_nodes + 1)):
        nodes = np.append(nodes, ''.join([stage_name, "_", str(n)]))

    # initialize directed asyclic graph (DAG) and add nodes to it
    # 加入所有节点
    dag = DAG()
    for n in nodes:
        dag.add_node(n)

    # split best indvidual found via GA to identify vertices connections and connect them in DAG
    # cumsum累积和,cumsum([0, 1, 2, 3])返回[0, 1, 3, 6]
    # 在这里体现为比如有4个node,二进制字符串长度为6,切割成s[:0], s[0:1], s[1:3], s[3:6]
    # 即连接每个节点的二进制字符串
    # 最后再删除第一个节点没有连的数据(上面的s[:0])
    edges = np.split(optimal_indvidual, np.cumsum(range(num_nodes - 1)))[1:]
    v2 = 2
    # 遍历所有节点的连接情况
    for e in edges:
        v1 = 1
        # 遍历这个节点的二进制字符串
        # 如果是1,添加边到dag
        # 这里其实for循环替代v1会好看些
        for i in e:
            if i:
                dag.add_edge(''.join([stage_name, "_",
                                      str(v1)]),
                             ''.join([stage_name, "_",
                                      str(v2)]))
            v1 += 1
        v2 += 1

    # delete nodes not connected to anyother node from DAG
    # 删除孤立的点
    for n in nodes:
        if len(dag.predecessors(n)) == 0 and len(dag.downstream(n)) == 0:
            dag.delete_node(n)
            nodes = np.delete(nodes, np.where(nodes == n)[0][0])

    return dag, nodes
Пример #3
0
class SearchSpace(dict):
    """A search space for HyperParameter.

    :param hps: a dict contain HyperParameters, condition and forbidden.
    :type hps: dict, default is `None`.
    """
    def __init__(self, desc=None):
        """Init SearchSpace."""
        super(SearchSpace, self).__init__()
        if desc is None:
            desc = SearchSpaceConfig().to_dict()
            if desc.type is not None:
                desc = ClassFactory.get_cls(ClassType.SEARCHSPACE,
                                            desc.type).get_space(desc)
        for name, item in desc.items():
            self.__setattr__(name, item)
            self.__setitem__(name, item)
        self._params = OrderedDict()
        self._condition_dict = OrderedDict()
        self._forbidden_list = []
        self._hp_count = 0
        self._dag = DAG()
        if desc is not None:
            self.form_desc(desc)

    @classmethod
    def get_space(self, desc):
        """Get Space."""
        return desc

    def form_desc(self, desc):
        """Create SearchSpace base on hyper-parameters object."""
        if 'hyperparameters' not in desc:
            return
        for space_dict in desc["hyperparameters"]:
            param = ParamsFactory.create_search_space(
                param_name=space_dict.get("key"),
                param_slice=space_dict.get('slice'),
                param_type=PARAM_TYPE_MAP[space_dict.get("type").upper()],
                param_range=space_dict.get("range"),
                generator=space_dict.get("generator"),
                sample_num=space_dict.get('sample_num'))
            self.add_hp(param)
        if "condition" in desc:
            for condition in desc["condition"]:
                _condition = ParamsFactory.create_condition(
                    self.get_hp(condition.get("child")),
                    self.get_hp(condition.get("parent")),
                    CONDITION_TYPE_MAP[condition.get("type").upper()],
                    condition.get("range"))
                self.add_condition(_condition)
        if "forbidden" in desc:
            for forbiddens in desc["forbidden"]:
                _forbiddens = []
                for _name, _value in forbiddens.items():
                    _forbiddens.append(
                        ForbiddenEqualsClause(param_name=self.get_hp(_name),
                                              value=_value))
                self.add_forbidden_clause(ForbiddenAndConjunction(_forbiddens))

    def sample(self):
        """Get the Sample of SearchSpace."""
        return self.decode(self.get_sample_space(1)[0])

    def verify_constraints(self, sample):
        """Verify condition."""
        for condition in self.get("condition", []):
            _type = condition["type"]
            child = condition["child"]  # eg. trainer.optimizer.params.momentum
            parent = condition["parent"]  # eg. trainer.optimizer.type
            _range = condition["range"]  # eg. range': ['SGD']
            if _type == "EQUAL" or _type == "IN":
                if parent in sample and sample[parent] in _range:
                    if child not in sample:
                        sample[child] = self.get_hp(child).sample()[0]
                elif child in sample:
                    del sample[child]
            if _type == "NOT_EQUAL":
                if parent in sample and sample[parent] in _range:
                    if child in sample:
                        del sample[child]
                elif child not in sample:
                    sample[child] = self.get_hp(child).sample()[0]
            # TODO condition type: IN, parent type: range
        return sample

    def size(self):
        """Get the size of SearchSpace, also the count of HyperParametera contained in this SearchSpace.

        :return: the size of SearchSpace.
        :rtype: int.

        """
        return self._hp_count

    def add_params(self, params):
        """Add params to the search space.

        :param list prams: List[HyperParameter].
        :return: List of added hyperparameters (same as input)
        :rtype: list

        """
        for param in params:
            if not ParamsFactory.is_params(param):
                raise TypeError("HyperParameter '%s' is not an instance of "
                                "SearchSpace.common.hyper_parameter."
                                "HyperParameter." % str(params))

        for param in params:
            self._add_hp(param)
        self._sort_hps()
        return self

    def add_hp(self, hyperparameter):
        """Add one hyperparameter to the hyperparameter space.

        :param HyperParameter hyperparameter: instance of `HyperParameter` to add.
        :return: hyperparameter (same as input)
        :rtype: HyperParameter

        """
        if not ParamsFactory.is_params(hyperparameter):
            raise TypeError("The method add_hp must be called "
                            "with an instance of SearchSpace."
                            "hyper_parameter.HyperParameter.")

        self._add_hp(hyperparameter)
        return self

    def _add_hp(self, hyperparameter):
        """Add one hyperparameter to the hyperparameter space.

        :param HyperParameter hyperparameter: instance of `HyperParameter` to add.

        """
        if hyperparameter.name in self._params:
            raise ValueError("HyperParameter `%s` is already in SearchSpace!" %
                             hyperparameter.name)
        self._params[hyperparameter.name] = hyperparameter
        self._hp_count = self._hp_count + 1
        self._dag.add_node(hyperparameter.name)

    def add_condition(self, condition):
        """Add new condition to the current SearchSpace.

        :param condition: `condition` that need to add.
        :type condition: instance of `Condition`.
        """
        if not ParamsFactory.is_condition(condition):
            raise ValueError('Not a valid condition {}'.format(condition))
        child_name = condition.child.name
        parent_name = condition.parent.name
        try:
            self._dag.add_edge(parent_name, child_name)
        except KeyError:
            raise KeyError('Hyperparameter in condition {} not exist in'
                           'current SearchSpace.'.format(condition))
        except DAGValidationError:
            raise KeyError('Current condition {} valid DAG rule in current'
                           'SearchSpace, can not be added!'.format(condition))
        if parent_name not in self._condition_dict:
            self._condition_dict[parent_name] = {}
        self._condition_dict[parent_name][child_name] = condition

    def add_forbidden_clause(self, forbidden_conjunction):
        """Add new ForbiddenAndConjunction to the current SearchSpace.

        :param forbidden_conjunction:  ForbiddenAndConjunction
        :type forbidden_conjunction: instance of `ForbiddenAndConjunction`.
        """
        if not isinstance(forbidden_conjunction, ForbiddenAndConjunction):
            raise ValueError(
                'Not a valid condition {}'.format(forbidden_conjunction))
        self._forbidden_list.append(forbidden_conjunction)

    def _sort_hps(self):
        """Sort the hyperparameter dictionary."""
        return

    def params(self):
        """Return the list of all hyperparameters.

        :return: List[HyperParameter]
        :rtype: list

        """
        return list(self._params.values())

    def get_hp_names(self):
        """Return the list of name of all hyperparameters.

        :return: List[str]
        :rtype: list

        """
        return list(self._params.keys())

    def get_hp(self, name):
        """Get HyperParameter by its name.

        :param str name: The name of HyperParameter.
        :return: HyperParameter
        :rtype: HyperParameter

        """
        hp = self._params.get(name)

        if hp is None:
            raise KeyError("HyperParameter '%s' does not exist in this "
                           "configuration space." % name)
        else:
            return hp

    def get_sample_space(self, n=1000, gridding=False):
        """Get the sampled param space from the current SearchSpace.

        :param int n: number of samples.
        :param bool gridding: use gridding sample or random sample.
        :return: shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        if gridding:
            return self._get_grid_sample_space()
        else:
            return self._get_random_sample_space(n)

    def _get_random_sample_space(self, n):
        """Get the sampled param space from the current SearchSpace.

        here we use the random sample, and return a np array of shape
        n*_hp_count, which is a sampled param space for GP or
        other model to predict.

        :param int n: sample count.
        :return: shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        parameters_array = np.zeros((n, self._hp_count))
        i = 0
        for _, hp in self._params.items():
            column = hp.sample(n=n, decode=False)
            parameters_array[:, i] = column
            i = i + 1
        return parameters_array

    def _generate_grid(self):
        """Get the all possible values for each of the tunables."""
        grid_axes = []
        for _, hp in self._params.items():
            grid_axes.append(hp.get_grid_axis(hp.slice))
        return grid_axes

    def _get_grid_sample_space(self):
        """Get the sampled param space from the current SearchSpace.

        here we use the random sample, and return a np array of shape
        n*len(_hyperparameters), which is a sampled param space for GP or
        other model to predict.

        :return: np.array, shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        param_list = [[]]
        params_grid = self._generate_grid()
        for param_grid in params_grid:
            param_list = [
                param_x + [param_y] for param_x in param_list
                for param_y in param_grid
            ]
        return param_list

    def decode(self, param_list):
        """Inverse transform a param list to original param dict.

        :param list param_list: the param list come from a search,
            in which params order are same with self._hyperparameters
        :return: the inverse transformed param dictionary.
        :rtype: dict

        """
        if len(param_list) != self._hp_count:
            raise ValueError(
                "param_list length not equal to SearchSpace size!")
        i = 0
        assigned_forbidden_dict = {}
        inversed_param_dict = {}
        final_param_dict = {}
        for name, hp in self._params.items():
            param_value = param_list[i]

            forbidden_flag = False
            forbidden_value = []
            for forbidden_conjunction in self._forbidden_list:
                if name in forbidden_conjunction._forbidden_dict:
                    forbidden_flag = True

                    total_len = assigned_forbidden_dict.__len__(
                    ) + forbidden_conjunction._forbidden_dict.__len__()
                    union_len = len(
                        set(
                            list(assigned_forbidden_dict.items()) +
                            list(forbidden_conjunction._forbidden_dict.items())
                        ))
                    # if assigned_forbidden_dict has same or similar forbidden conjunction
                    #  with `forbidden_conjunction`.
                    if (total_len - union_len) == \
                            forbidden_conjunction._forbidden_dict.__len__() - 1:
                        forbidden_value.append(
                            forbidden_conjunction._forbidden_dict.get(name))

            inversed_param_dict[name] = hp.decode(param_value, forbidden_value)
            if forbidden_flag:
                assigned_forbidden_dict[name] = inversed_param_dict[name]

            i = i + 1
        # check condition vaild
        # use DAG Breadth-First-Search to check each condition
        q = Queue()
        for ind_name in self._dag.ind_nodes():
            q.put(ind_name)
        while not q.empty():
            parent = q.get()
            final_param_dict[parent] = inversed_param_dict[parent]
            child_list = self._dag.downstream(parent)
            for child in child_list:
                condition = self._condition_dict[parent][child]
                if condition.evaluate(inversed_param_dict[parent]):
                    q.put(child)
        return final_param_dict
Пример #4
0
class HyperparameterSpace(object):
    """A search space for HyperParameter.

    :param hps: a dict contain HyperParameters, condition and forbidden.
    :type hps: dict, default is `None`.
    """
    def __init__(self, hps=None):
        """Init HyperparameterSpace."""
        self._hyperparameters = OrderedDict()
        self._condition_dict = OrderedDict()
        self._forbidden_list = []
        self._hp_count = 0
        self._dag = DAG()
        if hps is not None:
            self._hps2ds(hps)

    @classmethod
    def create(cls, hps):
        """Class method, create hyperparameter space.

        If hyperparameters not in hps, return None
        """
        if not hps:
            return cls()
        if "hyperparameters" not in hps:
            return None
        return cls(hps)

    def _hps2ds(self, hps):
        """Create HyperparameterSpace base on hyper-parameters object."""
        for hp in hps["hyperparameters"]:
            _hp = HyperParameter(
                param_name=hp.get("name"),
                param_slice=hp.get('slice'),
                param_type=PARAM_TYPE_MAP[hp.get("type").upper()],
                param_range=hp.get("range"))
            self.add_hyperparameter(_hp)
        if "condition" in hps:
            for condition in hps["condition"]:
                _condition = Condition(
                    self.get_hyperparameter(condition.get("child")),
                    self.get_hyperparameter(condition.get("parent")),
                    CONDITION_TYPE_MAP[condition.get("type").upper()],
                    condition.get("range"))
                self.add_condition(_condition)
        if "forbidden" in hps:
            for forbiddens in hps["forbidden"]:
                _forbiddens = []
                for _name, _value in forbiddens.items():
                    _forbiddens.append(
                        ForbiddenEqualsClause(
                            param_name=self.get_hyperparameter(_name),
                            value=_value))
                self.add_forbidden_clause(ForbiddenAndConjunction(_forbiddens))

    def size(self):
        """Get the size of HyperparameterSpace, also the count of HyperParametera contained in this HyperparameterSpace.

        :return: the size of HyperparameterSpace.
        :rtype: int.

        """
        return self._hp_count

    def add_hyperparameters(self, hyperparameters):
        """Add hyperparameters to the hyperparameter space.

        :param list hyperparameters: List[HyperParameter].
        :return: List of added hyperparameters (same as input)
        :rtype: list

        """
        for hyperparameter in hyperparameters:
            if not isinstance(hyperparameter, HyperParameter):
                raise TypeError("HyperParameter '%s' is not an instance of "
                                "HyperparameterSpace.common.hyper_parameter."
                                "HyperParameter." % str(hyperparameter))

        for hyperparameter in hyperparameters:
            self._add_hyperparameter(hyperparameter)
        self._sort_hyperparameters()
        return hyperparameters

    def add_hyperparameter(self, hyperparameter):
        """Add one hyperparameter to the hyperparameter space.

        :param HyperParameter hyperparameter: instance of `HyperParameter` to add.
        :return: hyperparameter (same as input)
        :rtype: HyperParameter

        """
        if not isinstance(hyperparameter, HyperParameter):
            raise TypeError("The method add_hyperparameter must be called "
                            "with an instance of HyperparameterSpace.common."
                            "hyper_parameter.HyperParameter.")

        self._add_hyperparameter(hyperparameter)
        return hyperparameter

    def _add_hyperparameter(self, hyperparameter):
        """Add one hyperparameter to the hyperparameter space.

        :param HyperParameter hyperparameter: instance of `HyperParameter` to add.

        """
        if hyperparameter.name in self._hyperparameters:
            raise ValueError(
                "HyperParameter `%s` is already in HyperparameterSpace!" %
                hyperparameter.name)
        self._hyperparameters[hyperparameter.name] = hyperparameter
        self._hp_count = self._hp_count + 1
        self._dag.add_node(hyperparameter.name)

    def add_condition(self, condition):
        """Add new condition to the current HyperparameterSpace.

        :param condition: `condition` that need to add.
        :type condition: instance of `Condition`.
        """
        if not isinstance(condition, Condition):
            raise ValueError('Not a valid condition {}'.format(condition))
        child_name = condition.child.name
        parent_name = condition.parent.name
        try:
            self._dag.add_edge(parent_name, child_name)
        except KeyError:
            raise KeyError('Hyperparameter in condition {} not exist in'
                           'current HyperparameterSpace.'.format(condition))
        except DAGValidationError:
            raise KeyError(
                'Current condition {} valid DAG rule in current'
                'HyperparameterSpace, can not be added!'.format(condition))
        if parent_name not in self._condition_dict:
            self._condition_dict[parent_name] = {}
        self._condition_dict[parent_name][child_name] = condition

    def add_forbidden_clause(self, forbidden_conjunction):
        """Add new ForbiddenAndConjunction to the current HyperparameterSpace.

        :param forbidden_conjunction:  ForbiddenAndConjunction
        :type forbidden_conjunction: instance of `ForbiddenAndConjunction`.
        """
        if not isinstance(forbidden_conjunction, ForbiddenAndConjunction):
            raise ValueError(
                'Not a valid condition {}'.format(forbidden_conjunction))
        self._forbidden_list.append(forbidden_conjunction)

    def _sort_hyperparameters(self):
        """Sort the hyperparameter dictionary."""
        return

    def get_hyperparameters(self):
        """Return the list of all hyperparameters.

        :return: List[HyperParameter]
        :rtype: list

        """
        return list(self._hyperparameters.values())

    def get_hyperparameter_names(self):
        """Return the list of name of all hyperparameters.

        :return: List[str]
        :rtype: list

        """
        return list(self._hyperparameters.keys())

    def get_hyperparameter(self, name):
        """Get HyperParameter by its name.

        :param str name: The name of HyperParameter.
        :return: HyperParameter
        :rtype: HyperParameter

        """
        hp = self._hyperparameters.get(name)

        if hp is None:
            raise KeyError("HyperParameter '%s' does not exist in this "
                           "configuration space." % name)
        else:
            return hp

    def get_sample_space(self, n=1000, gridding=False):
        """Get the sampled param space from the current HyperparameterSpace.

        :param int n: number of samples.
        :param bool gridding: use gridding sample or random sample.
        :return: shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        if gridding:
            return self._get_grid_sample_space()
        else:
            return self._get_random_sample_space(n)

    def _get_random_sample_space(self, n):
        """Get the sampled param space from the current HyperparameterSpace.

        here we use the random sample, and return a np array of shape
        n*_hp_count, which is a sampled param space for GP or
        other model to predict.

        :param int n: sample count.
        :return: shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        parameters_array = np.zeros((n, self._hp_count))
        i = 0
        for _, hp in self._hyperparameters.items():
            low, high = hp.range
            if hp.is_integer:
                column = np.random.randint(low, high + 1, size=n)
            else:
                d = high - low
                column = low + d * np.random.rand(n)
            parameters_array[:, i] = column
            i = i + 1
        return parameters_array

    def _generate_grid(self):
        """Get the all possible values for each of the tunables."""
        grid_axes = []
        for _, hp in self._hyperparameters.items():
            grid_axes.append(hp.get_grid_axis(hp.slice))
        return grid_axes

    def _get_grid_sample_space(self):
        """Get the sampled param space from the current HyperparameterSpace.

        here we use the random sample, and return a np array of shape
        n*len(_hyperparameters), which is a sampled param space for GP or
        other model to predict.

        :return: np.array, shape is (n, len(self._hyperparameters)).
        :rtype: np.array

        """
        param_list = [[]]
        params_grid = self._generate_grid()
        for param_grid in params_grid:
            param_list = [
                param_x + [param_y] for param_x in param_list
                for param_y in param_grid
            ]
        return param_list

    def inverse_transform(self, param_list):
        """Inverse transform a param list to original param dict.

        :param list param_list: the param list come from a search,
            in which params order are same with self._hyperparameters
        :return: the inverse transformed param dictionary.
        :rtype: dict

        """
        if len(param_list) != self._hp_count:
            raise ValueError(
                "param_list length not equal to HyperparameterSpace size!")
        i = 0
        assigned_forbidden_dict = {}
        inversed_param_dict = {}
        final_param_dict = {}
        for name, hp in self._hyperparameters.items():
            param_value = param_list[i]

            forbidden_flag = False
            forbidden_value = []
            for forbidden_conjunction in self._forbidden_list:
                if name in forbidden_conjunction._forbidden_dict:
                    forbidden_flag = True

                    total_len = assigned_forbidden_dict.__len__() + \
                        forbidden_conjunction._forbidden_dict.__len__()
                    union_len = len(
                        set(
                            list(assigned_forbidden_dict.items()) +
                            list(forbidden_conjunction._forbidden_dict.items())
                        ))
                    # if assigned_forbidden_dict has same or similar forbidden conjunction
                    #  with `forbidden_conjunction`.
                    if (total_len - union_len) == \
                            forbidden_conjunction._forbidden_dict.__len__() - 1:
                        forbidden_value.append(
                            forbidden_conjunction._forbidden_dict.get(name))

            inversed_param_dict[name] = \
                hp.inverse_transform(param_value, forbidden_value)
            if forbidden_flag:
                assigned_forbidden_dict[name] = inversed_param_dict[name]

            i = i + 1
        # check condition vaild
        # use DAG Breadth-First-Search to check each condition
        q = Queue()
        for ind_name in self._dag.ind_nodes():
            q.put(ind_name)
        while not q.empty():
            parent = q.get()
            final_param_dict[parent] = inversed_param_dict[parent]
            child_list = self._dag.downstream(parent)
            for child in child_list:
                condition = self._condition_dict[parent][child]
                if condition.evaluate(inversed_param_dict[parent]):
                    q.put(child)
        return final_param_dict