示例#1
0
    def objective_training(self,
                           reference_vectors: list,
                           graph_type: GraphType = None):
        """
        Train until V(s0) value is close to objective value.
        :param graph_type:
        :param reference_vectors:
        :return:
        """

        # Calc current hypervolume
        current_hypervolume = self._best_hypervolume(
            self.environment.initial_state)

        # Calc objective hypervolume
        objective_hypervolume = uh.calc_hypervolume(
            vectors=reference_vectors, reference=self.hv_reference)

        # While current hypervolume is different to objective hypervolume (With a tolerance indicates by
        # Vector.decimal_precision) do an episode.
        while not math.isclose(a=current_hypervolume,
                               b=objective_hypervolume,
                               rel_tol=Vector.decimal_precision):
            # Do an episode
            self.episode(graph_type=graph_type)

            # Update hypervolume
            current_hypervolume = self._best_hypervolume(
                self.environment.initial_state)
示例#2
0
    def calculate_hypervolume(self):
        """
        Calc the hypervolume for each action and returns a list of tuples
        (maximum-hv, [(action, hypervolume)*], summation-hv)
        :return:
        """

        result = list()

        maximum = float('-inf')
        summation = 0

        # for each a in actions
        for a in self.environment.action_space:

            # Get Q-set from position given for each possible action.
            q_set = self.q_set(state=self.state, action=a)

            # Calc hypervolume of Q_set, with reference given, and store in list with action
            hv = uh.calc_hypervolume(vectors=q_set,
                                     reference=self.hv_reference)
            result.append((a, hv))
            if hv > maximum:
                maximum = hv

            summation += hv

        return maximum, result, summation
示例#3
0
    def _best_hypervolume(self, state: object = None) -> float:
        """
        Return best hypervolume for position given.
        :return:
        """

        # Check if a position is given
        state = state if state else self.environment.current_state

        # Hypervolume list
        hv = list()

        # Save previous position
        previous_state = self.environment.current_state
        self.environment.current_state = state

        for a in self.environment.action_space:
            # Get Q-set from position given for each possible action.
            q_set = self.q_set(state=state, action=a)

            # Calc hypervolume of Q_set, with reference given.
            hv.append(
                uh.calc_hypervolume(vectors=q_set,
                                    reference=self.hv_reference))

        # Restore environment correct position
        self.environment.current_state = previous_state

        return max(hv)
示例#4
0
    def chv_evaluation(self, state: object) -> int:
        """
        Calc the hypervolume for the vectors that provide cardinality for each action and returns a tuple
        (maximum_chv, list of tuples (action, chv), sum of chv)

        CAUTION: This method assumes actions are integers in a range.

        :param state:
        :return:
        """

        # List of all Qs
        all_q = list()

        # Getting action_space
        action_space = self.environment.action_space

        # for each a in actions
        for a in action_space:

            # Get Q-set from position given for each possible action.
            q_set = self.q_set(state=state, action=a)

            # for each Q in Q_set(state, a)
            for q in q_set:
                all_q.append(IndexVector(index=a, vector=q))

        # NDQs <- ND(all_q). Keep only the non-dominating solutions (We want the vectors, so return_vectors must be
        # True)
        vectors_dict = IndexVector.actions_occurrences_based_m3_with_repetitions(
            vectors=all_q, actions=action_space, returns_vectors=True)

        # Dict where each action has it hypervolume
        hypervolume_actions = {
            action:
            uh.calc_hypervolume(vectors=vectors, reference=self.hv_reference)
            if len(vectors) > 0 else 0.0
            for action, vectors in vectors_dict.items()
        }

        # Get max hypervolume
        max_hypervolume = max(hypervolume_actions.values())

        # Get all max actions
        filter_actions = [
            action for action in hypervolume_actions.keys()
            if hypervolume_actions[action] == max_hypervolume
        ]

        # Choose randomly among actions with maximum hypervolume
        return self.generator.choice(filter_actions)
示例#5
0
def hv_graph(data: dict):
    # Prepare hypervolume to dumps data
    hv_file = Path(__file__).parent.joinpath('article/output/hv.m')

    # If any parents doesn't exist, make it.
    hv_file.parent.mkdir(parents=True, exist_ok=True)

    with hv_file.open(mode='w+', encoding='UTF-8') as file:
        file_data = 'figure;\n'
        file_data += 'hold on;\n\n'

        file_data += "title('Hypervolume');\n\n"

        labels = dict()

        for columns, values in data.items():

            for label, information in values.items():
                # Convert to vectors
                vectors = list(map(Vector, information['vectors']['(0, 0)']))

                # Calculate hypervolume
                element = uh.calc_hypervolume(vectors=vectors,
                                              reference=vector_reference)

                # Get previous data
                previous_data = labels.get(label, {columns: element})

                if columns not in previous_data:
                    previous_data.update({columns: element})

                # Update labels information
                labels.update({label: previous_data})

        for label, information in labels.items():
            file_data += 'X = [{}];\n'.format(', '.join(
                map(str, information.keys())))
            file_data += 'Y = [{}];\n'.format(', '.join(
                map(str, information.values())))
            file_data += "plot(X, Y, 'Color', '{}', 'Marker', '{}');\n\n".format(
                line_config[label]['color'], line_config[label]['marker'])

        file_data += "x_label('# of diagonals');\n"
        file_data += "y_label('Hypervolume');\n"
        file_data += "\n"
        file_data += 'legend({});\n'.format(', '.join(
            "'{}'".format(label) for label in labels.keys()))
        file_data += 'hold off;\n'

        file.write(file_data)
示例#6
0
    def calculate_chv(self):
        """
        Calc the hypervolume for the vectors that provide cardinality for each action and returns a tuple
        (maximum_chv, list of tuples (action, chv), sum of chv)

        CAUTION: This method assumes actions are integers in a range.

        :return:
        """

        # List of all Qs
        all_q = list()

        # Getting action_space
        action_space = self.environment.action_space

        # for each a in actions
        for a in action_space:

            # Get Q-set from position given for each possible action.
            q_set = self.q_set(state=self.state, action=a)

            # for each Q in Q_set(state, a)
            for q in q_set:
                all_q.append(IndexVector(index=a, vector=q))

        # NDQs <- ND(all_q). Keep only the non-dominating solutions (We want the vectors, so return_vectors must be
        # True)
        vectors = IndexVector.actions_occurrences_based_m3_with_repetitions(
            vectors=all_q, actions=action_space, returns_vectors=True)

        result = []
        maximum = -1
        summation = 0

        for a in action_space:

            chv = 0

            if len(vectors[a]) > 0:
                chv = uh.calc_hypervolume(vectors=vectors[a],
                                          reference=self.hv_reference)

            result.append((a, chv))
            maximum = max(maximum, chv)
            summation += chv

        return maximum, result, summation
示例#7
0
    def _best_hypervolume(self, state: object = None) -> float:
        """
        Return best hypervolume for position given.
        :return:
        """

        # Check if a position is given
        state = state if state else self.environment.current_state

        # Get Q-set from position given for each possible action
        v = list(self.v.get(state, {}).values())

        # If v is empty, default is initial_value variable.
        v = v if v else [self.initial_q_value]

        # Getting hypervolume
        hv = uh.calc_hypervolume(vectors=v, reference=self.hv_reference)

        return hv
示例#8
0
    def hypervolume_evaluation(self, state: object) -> int:
        """
        Calc the hypervolume for each action in position given. (HV-PQL)
        :param state:
        :return:
        """

        actions = list()
        max_evaluation = float('-inf')

        # Getting action_space
        action_space = self.environment.action_space

        # for each a in actions
        for a in action_space:

            # Get Q-set from position given for each possible action
            q_set = self.q.get(state, dict()).get(
                a, {(0, ): IndexVector(index=0, vector=self.initial_q_value)})

            # Filter vector from index vectors
            q_set = [q.vector for q in q_set.values()]

            # Calc hypervolume of Q_set, with reference given
            evaluation = uh.calc_hypervolume(vectors=q_set,
                                             reference=self.hv_reference)

            # If current value is close to new value
            if math.isclose(a=evaluation, b=max_evaluation):
                # Append another possible action
                actions.append(a)

            elif evaluation > max_evaluation:
                # Create a new list with current key
                actions = [a]

            # Update max value
            max_evaluation = max(max_evaluation, evaluation)

        # from best actions get one randomly
        return self.generator.choice(actions)
示例#9
0
    def update_graph(self, graph_type: GraphType):
        """
        Update specific graph type
        :param graph_type:
        :return:
        """

        for state, data in self.graph_info[graph_type].items():
            # Calc pareto'state frontier found

            if not self.pareto_frontier_found:
                value = self.initial_q_value
            else:
                value = uh.calc_hypervolume(vectors=self.pareto_frontier_found,
                                            reference=self.hv_reference)

            # Add to graph train_data
            data.append(value)

            # Update dictionary
            self.graph_info[graph_type].update({state: data})
示例#10
0
    def hypervolume_evaluation(self, state: object) -> int:
        """
        Calc the hypervolume for each action in the given position, and returns the int representing the action
        with maximum hypervolume. (Approximate) ties are broken choosing randomly among actions with
        (approximately) maximum hypervolume. (EvaluationMechanism.HV)
        :param state:
        :return:
        """

        actions = list()
        max_evaluation = float('-inf')

        # for each a in actions
        for a in self.environment.action_space:

            # Get Q-set from position given for each possible action.
            q_set = self.q_set(state=state, action=a)

            # Calc hypervolume of Q_set, with reference given.
            evaluation = uh.calc_hypervolume(vectors=q_set,
                                             reference=self.hv_reference)

            # If current value is close to new value
            if math.isclose(a=evaluation, b=max_evaluation):
                # Append another possible action
                actions.append(a)

            elif evaluation > max_evaluation:
                # Create a new list with current key.
                actions = [a]

            # Update max value
            max_evaluation = max(max_evaluation, evaluation)

        # from best actions get one aleatory.
        return self.generator.choice(actions)
示例#11
0
    def has_converged(self, v_k: dict, v_k_1: dict, tolerance: float) -> bool:
        """
        Check if a policy has converged
        :param v_k:
        :param v_k_1:
        :param tolerance:
        :return:
        """

        # By default
        converged = False

        if self.convergence_graph:

            # List of differences
            differences = list()

            for key, vectors_v_k_s in v_k.items():
                # Recover vectors from both V'state
                vectors_v_k_1_s = v_k_1[key]

                # If the checks get here, we calculate the hypervolume
                hv_v_k = uh.calc_hypervolume(
                    vectors=vectors_v_k_s,
                    reference=self.environment.hv_reference)
                hv_v_k_1 = uh.calc_hypervolume(
                    vectors=vectors_v_k_1_s,
                    reference=self.environment.hv_reference)

                # Check if absolute difference is lower than tolerance
                differences.append(abs(hv_v_k_1 - hv_v_k))

            max_difference = max(differences)
            converged = max_difference < tolerance
            self.convergence_graph_data.append(max_difference)

        else:
            for key, vectors_v_k_s in v_k.items():

                # If all checks are right, convergence will be True, but at the moment...
                converged = False

                # Recover vectors from both V'state
                vectors_v_k_1_s = v_k_1[key]

                # V_k(state) and V_K_1(state) has different lengths
                if not (len(vectors_v_k_s) == len(vectors_v_k_1_s)):
                    break

                # If the checks get here, we calculate the hypervolume
                hv_v_k = uh.calc_hypervolume(
                    vectors=vectors_v_k_s,
                    reference=self.environment.hv_reference)
                hv_v_k_1 = uh.calc_hypervolume(
                    vectors=vectors_v_k_1_s,
                    reference=self.environment.hv_reference)

                # Check if absolute difference is lower than tolerance
                converged = abs(hv_v_k_1 - hv_v_k) < tolerance

                # If difference between HV(V_k(state)) and HV(V_k_1(state)) is greater than tolerance, not converged
                if not converged:
                    break

        return converged