Пример #1
0
    def convex_hull_search_better(self, prob, iteraion_number):
        #keep_vector_results = []

        m = self.m
        obs = open("observe-search" + ".txt", "w")

        frontier = utils.my_data_struc()
        for i in range(self.d):
            m.set_Lambda(np.array([1 if j==i else 0 for j in xrange(self.d)]))
            Uvec_n_d = m.policy_iteration()
            v_d = m.initial_states_distribution().dot(Uvec_n_d)
            n = Problem.Node([self.produce_policy(m.best_policy(Uvec_n_d)), v_d, Uvec_n_d])
            frontier.append(n)

        index_list = self.frontier_convex_hull(frontier)
        frontier.update(index_list)

        explored = self.fill_explored(frontier.A)
        print >> obs, 'explored members', [val for val in explored.itervalues()]

        #best_improve_for_node = 1000.0
        #iteration = 0
        #after first iteration-------------------
        #while best_improve_for_node > prob.epsilon :
        for iteration in range(iteraion_number):
            #print >> obs, 'iteration ------------------', iteration
            #print >> obs, 'best improvement', best_improve_for_node
            obs.flush()
            #iteration += 1

            frontier_addition = utils.my_data_struc()

            #max_improvement_list = []
            for node in frontier.A:
                #max_improvement = -100.0
                for child in node.expand(problem= prob):
                    frontier_addition.append(child)
                    improv_new = child.improvement
                    #if improv_new > max_improvement:
                    #    max_improvement = improv_new

                #max_improvement_list.append(max_improvement)

            #best_improve_for_node = max(max_improvement_list)

            for node in frontier_addition.A:
                frontier.append(node)

            index_list = self.frontier_convex_hull(frontier)
            frontier.update(index_list)
            explored = self.fill_explored(frontier.A)

            explored_list = [val for val in explored.itervalues()]
            #print >> obs, 'explored members', explored_list
            #keep_vector_results.append(explored_list)

        #return  keep_vector_results
        #print >> obs, 'explored members final', [val for val in explored.itervalues()]
        return [val for val in explored.itervalues()]
Пример #2
0
    def update_convex_hull_epsilon(self, P_initial, frontier, hull_vertices, problem):
        """
        this function gets set of current polytope vertices, generates new points using clustering on advantages
        and make a new convex hull of them.
        :param P_initial: matrix of d-dimensional rows
        :param frontier: queue of type my_data_struc includes all nodes for extension
        :return: pairs of (P_initial, frontier) in which P_initial includes 0 vector.
        """

        frontier_addition = utils.my_data_struc()
        P_new = P_initial

        for node in frontier.A:
            for child in node.expand(problem= problem):
                if not (self.check_epsilon(child.state[1], P_initial, self.epsilon_error)):
                    frontier_addition.append(child)
                    P_new = np.vstack([P_new, child.state[1]])

        length_hull_vertices = len(hull_vertices)
        counter = 0
        for node in frontier_addition.A:
            frontier.append(node)
            hull_vertices.append(length_hull_vertices+counter)
            counter += 1

        temp_convex = self.make_convex_hull(P_new, hull_vertices)
        P_initial = temp_convex[0]
        hull_vertices = temp_convex[1]

        frontier.update([item-1 for item in hull_vertices if item-1 >= 0])

        return (P_initial, frontier, hull_vertices)
Пример #3
0
    def convex_hull_search(self, prob):
        """
        this function gets a problem as tree of nodes each node is pair of policy and V_bar as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25])
        and try to propagate all v_bar using extending node in each iteration and take their vertices of the optimal convex hull.
        :param problem: tree of nodes each node is pair of policy and V_bar as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25])
        :return: returns set of approximated non-dominated v_bar vectors: vectors of dimension d
        """

        P_initial= np.zeros(shape=(1, self.d))
        m = self.m
        frontier = utils.my_data_struc()

        for i in range(self.d):
            m.set_Lambda(np.array([1 if j==i else 0 for j in xrange(self.d)]))
            Uvec_n_d = m.policy_iteration()
            v_d = m.initial_states_distribution().dot(Uvec_n_d)
            n = Problem.Node([self.produce_policy(m.best_policy(Uvec_n_d)), v_d, Uvec_n_d])
            frontier.append(n)

        for item in range(self.d):
            P_initial = np.vstack([P_initial, frontier.A[item].state[1]])

        hull_vertices = range(self.d + 1)

        #make convex hull of points *******************************
        temp_convex = self.make_convex_hull(P_initial, hull_vertices)
        P_initial = temp_convex[0]
        hull_vertices = temp_convex[1]
        #************************************************************
        frontier.update([item-1 for item in hull_vertices if item-1 >= 0])

        temp = self.update_convex_hull_epsilon(P_initial, frontier, hull_vertices, prob)
        P_new = temp[0]
        frontier = temp[1]
        hull_vertices = temp[2]

        #while P_initial and P_new are not equal
        while not (np.array_equal(P_initial, P_new)):
        #for i in range(1000):
            P_initial = P_new
            temp = self.update_convex_hull_epsilon(P_initial, frontier, hull_vertices, prob)
            P_new = temp[0]
            frontier = temp[1]
            hull_vertices = temp[2]

        print 'P_new', P_new
        return [val for val in P_new[1:]]
Пример #4
0
    def update_convex_hull_epsilon(self, frontier, problem):
        """
        this function gets set of current polytope vertices, generates new vectors inside \mathcal{V} polytope using
        clustering advantages and making a new convex hull of them.
        :param P_initial: matrix of d-dimensional rows, each row is a vertice of the given polytop.
        :param frontier: queue of type my_data_struc includes all nodes for extension
        :param hull_vertices: indices of P_initial vectors; we keep this index to not consider [0,...,0] vector in vector extensions.
        :param problem: the introduced problem as a mdp with two types of errors.
        :return: pairs of (P_initial, frontier) such that P_initial includes [0,..,0] vector too.
        """

        frontier_addition = utils.my_data_struc()

        """P_new saves vertices of the given convex hull"""
        P_new = np.zeros(shape=(1, self.d))
        for i in range(frontier.__len__()):
            P_new = np.vstack([P_new, frontier.A[i].state[1]])

        P_initial = copy.copy(P_new)

        #TODO may be using frontier is not anymore useful in our new method.
        for node in frontier.A:
            for child in node.expand(problem= problem):
                if not (self.check_epsilon(child.state[1], P_initial, self.epsilon_error)):
                    frontier_addition.append(child)
                    P_new = np.vstack([P_new, child.state[1]])
        """at the end of this loop, it added all new generated vectors of each vertice too"""

        length_hull_vertices = frontier.__len__()
        hull_vertices = range(length_hull_vertices)
        counter = 0
        for node in frontier_addition.A:
             frontier.append(node)
             hull_vertices.append(length_hull_vertices+counter)
             counter += 1

        temp_convex = self.make_convex_hull(P_new, hull_vertices)
        P_initial = temp_convex[0]
        hull_vertices = temp_convex[1]
        frontier.update([item-1 for item in hull_vertices if item-1 >= 0])

        return (P_initial, frontier, hull_vertices)
Пример #5
0
    def convex_hull_search_experimental(self, prob, random_lambdas, exact, aver_lambda):
        """
        this function gets a problem as tree of nodes each node is pair of policy, V_bar and Uvec matrix as
        ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25], [[1.00, 0.30]
                                                 [0.50, 0.60]
                                                 [0.78, 0.43]
                                                 [1.40, 3.11]])
        and tries to propagate all v_bar using extending node in each iteration and take their vertices of the optimal convex hull.
        :param problem: tree of nodes each node is pair of policy and V_bar as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25])
        :return: returns set of approximated non-dominated v_bar vectors: vectors of dimension d
        """

        """
        an array initialized by [0, ..,0] vector of dimension d, P_initial keeps
        the vertices of optimal convex hull after any iteration
        """
        P_initial= np.zeros(shape=(1, self.d))
        m = self.m
        """we use frontier to keep each required vertice of convex hull inside it. this structure contains only the vectors."""
        frontier = utils.my_data_struc()

        '''
        make initial v_bars using d vectors in which each vector of the form [0,..,0,1,0,..,0] and saves three required
        information : [Policy, v_bar, Uvec_n_d] as a Node structure. This list is assumed as a state for the graph.
        '''
        for i in range(self.d):
            m.set_Lambda(np.array([1 if j==i else 0 for j in xrange(self.d)]))
            Uvec_n_d = m.value_iteration(epsilon=0.00001)
            v_d = m.initial_states_distribution().dot(Uvec_n_d)
            n = Problem.Node([self.produce_policy(m.best_policy(Uvec_n_d)), v_d, Uvec_n_d])
            frontier.append(n)

        """add v-bar vectors related to [0,..,0,1,0,..,0] identical lambda vetors for initializing the optimal \mathcal{V}
        polytope"""
        for item in range(self.d):
                P_initial = np.vstack([P_initial, frontier.A[item].state[1]])

        #hull_vertices = range(P_initial.shape[0])
        #************************************************************

        """removes unused nodes from our graph. it means if related vector of Node is not considered in the convex hull
        update function will remove it from frontier"""
        temp = self.update_convex_hull_epsilon(frontier, prob)
        P_new = temp[0]
        frontier = temp[1]

        res = open("check" + ".txt", "w")

        #lists for saving error vs |V| length
        errors = []
        queries = []
        vector_length = []

        iteration = 0
        #while not(self.IsEqual(P_initial, P_new)):
        for i in range(20):#(250):
            if not(self.IsEqual(P_initial, P_new)):
                P_initial = P_new
                temp = self.update_convex_hull_epsilon(frontier, prob)
                P_new = temp[0]
                frontier = temp[1]
                iteration += 1

                #to see error changes vs size of generated Vs
                vectors = [val for val in P_new[1:] if not all(v == 0.0 for v in val)]
                print >> res, "********** iteration", i, "******************"
                res.flush()

                queries_ave = []
                errors_ave= []
                res.flush()

                for j in range(aver_lambda):
                    index = i*aver_lambda+j

                    V = V_bar_search.V_bar_search(_mdp= self.m, _V_bar=vectors, lam_random = random_lambdas[index])
                    temp = V.v_optimal(_random_lambda_number = 1000)

                    v_opt = temp[0]
                    queries_ave.append(temp[1])
                    errors_ave.append(np.dot(random_lambdas[index], v_opt) - np.dot(random_lambdas[index], exact[index]))


                errors.append(np.abs(np.average(errors_ave)))
                queries.append(np.average(queries_ave))
                vector_length.append(len(vectors))

                print >> res, "errors_ave", errors
                print >> res, "vector length", vector_length
                print >> res, "asked queries", queries
                res.flush()


            else:
                print '*******final results **********', (vector_length, errors, queries, iteration)
                return (vector_length, errors, queries, iteration)
            if i % 10 == 0:
                print i,"=", P_new.shape[0]

        #print 'iteration', iteration
        print '*******final results **********', (vector_length, errors, queries, iteration)
        return (vector_length, errors, queries, iteration)
Пример #6
0
    def convex_hull_search(self, prob):
        """
        this function gets a problem as tree of nodes each node is pair of policy, V_bar and Uvec matrix as
        ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25], [[1.00, 0.30]
                                                 [0.50, 0.60]
                                                 [0.78, 0.43]
                                                 [1.40, 3.11]])
        and tries to propagate all v_bar using extending node in each iteration and take their vertices of the optimal convex hull.
        :param problem: tree of nodes each node is pair of policy and V_bar as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25])
        :return: returns set of approximated non-dominated v_bar vectors: vectors of dimension d
        """

        """
        an array initialized by [0, ..,0] vector of dimension d, P_initial keeps
        the vertices of optimal convex hull after any iteration
        """
        P_initial= np.zeros(shape=(1, self.d))
        m = self.m
        """we use frontier to keep each required vertice of convex hull inside it. this structure contains only the vectors."""
        frontier = utils.my_data_struc()

        '''
        make initial v_bars using d vectors in which each vector of the form [0,..,0,1,0,..,0] and saves three required
        information : [Policy, v_bar, Uvec_n_d] as a Node structure. This list is assumed as a state for the graph.
        '''
        for i in range(self.d):
            m.set_Lambda(np.array([1 if j==i else 0 for j in xrange(self.d)]))
            Uvec_n_d = m.value_iteration(epsilon=0.00001)
            v_d = m.initial_states_distribution().dot(Uvec_n_d)
            n = Problem.Node([self.produce_policy(m.best_policy(Uvec_n_d)), v_d, Uvec_n_d])
            frontier.append(n)

        """add v-bar vectors related to [0,..,0,1,0,..,0] identical lambda vetors for initializing the optimal \mathcal{V}
        polytope"""
        for item in range(self.d):
                P_initial = np.vstack([P_initial, frontier.A[item].state[1]])

        #hull_vertices = range(P_initial.shape[0])
        #************************************************************

        """removes unused nodes from our graph. it means if related vector of Node is not considered in the convex hull
        update function will remove it from frontier"""
        temp = self.update_convex_hull_epsilon(frontier, prob)
        P_new = temp[0]
        frontier = temp[1]

        iteration = 0
        #while not(self.IsEqual(P_initial, P_new)):
        for i in range(250):
            if not(self.IsEqual(P_initial, P_new)):
                P_initial = P_new
                temp = self.update_convex_hull_epsilon(frontier, prob)
                P_new = temp[0]
                frontier = temp[1]
                iteration += 1
            else:
                return ([val for val in P_new[1:] if not all(v == 0.0 for v in val)], iteration)
            if i % 10 == 0:
                print i,"=", P_new.shape[0]

        #print 'iteration', iteration
        return ([val for val in P_new[1:] if not all(v == 0.0 for v in val)], iteration)