def convex_hull_search_better(self, prob, iteraion_number): #keep_vector_results = [] m = self.m obs = open("observe-search" + ".txt", "w") frontier = utils.my_data_struc() for i in range(self.d): m.set_Lambda(np.array([1 if j==i else 0 for j in xrange(self.d)])) Uvec_n_d = m.policy_iteration() v_d = m.initial_states_distribution().dot(Uvec_n_d) n = Problem.Node([self.produce_policy(m.best_policy(Uvec_n_d)), v_d, Uvec_n_d]) frontier.append(n) index_list = self.frontier_convex_hull(frontier) frontier.update(index_list) explored = self.fill_explored(frontier.A) print >> obs, 'explored members', [val for val in explored.itervalues()] #best_improve_for_node = 1000.0 #iteration = 0 #after first iteration------------------- #while best_improve_for_node > prob.epsilon : for iteration in range(iteraion_number): #print >> obs, 'iteration ------------------', iteration #print >> obs, 'best improvement', best_improve_for_node obs.flush() #iteration += 1 frontier_addition = utils.my_data_struc() #max_improvement_list = [] for node in frontier.A: #max_improvement = -100.0 for child in node.expand(problem= prob): frontier_addition.append(child) improv_new = child.improvement #if improv_new > max_improvement: # max_improvement = improv_new #max_improvement_list.append(max_improvement) #best_improve_for_node = max(max_improvement_list) for node in frontier_addition.A: frontier.append(node) index_list = self.frontier_convex_hull(frontier) frontier.update(index_list) explored = self.fill_explored(frontier.A) explored_list = [val for val in explored.itervalues()] #print >> obs, 'explored members', explored_list #keep_vector_results.append(explored_list) #return keep_vector_results #print >> obs, 'explored members final', [val for val in explored.itervalues()] return [val for val in explored.itervalues()]
def update_convex_hull_epsilon(self, P_initial, frontier, hull_vertices, problem): """ this function gets set of current polytope vertices, generates new points using clustering on advantages and make a new convex hull of them. :param P_initial: matrix of d-dimensional rows :param frontier: queue of type my_data_struc includes all nodes for extension :return: pairs of (P_initial, frontier) in which P_initial includes 0 vector. """ frontier_addition = utils.my_data_struc() P_new = P_initial for node in frontier.A: for child in node.expand(problem= problem): if not (self.check_epsilon(child.state[1], P_initial, self.epsilon_error)): frontier_addition.append(child) P_new = np.vstack([P_new, child.state[1]]) length_hull_vertices = len(hull_vertices) counter = 0 for node in frontier_addition.A: frontier.append(node) hull_vertices.append(length_hull_vertices+counter) counter += 1 temp_convex = self.make_convex_hull(P_new, hull_vertices) P_initial = temp_convex[0] hull_vertices = temp_convex[1] frontier.update([item-1 for item in hull_vertices if item-1 >= 0]) return (P_initial, frontier, hull_vertices)
def convex_hull_search(self, prob): """ this function gets a problem as tree of nodes each node is pair of policy and V_bar as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25]) and try to propagate all v_bar using extending node in each iteration and take their vertices of the optimal convex hull. :param problem: tree of nodes each node is pair of policy and V_bar as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25]) :return: returns set of approximated non-dominated v_bar vectors: vectors of dimension d """ P_initial= np.zeros(shape=(1, self.d)) m = self.m frontier = utils.my_data_struc() for i in range(self.d): m.set_Lambda(np.array([1 if j==i else 0 for j in xrange(self.d)])) Uvec_n_d = m.policy_iteration() v_d = m.initial_states_distribution().dot(Uvec_n_d) n = Problem.Node([self.produce_policy(m.best_policy(Uvec_n_d)), v_d, Uvec_n_d]) frontier.append(n) for item in range(self.d): P_initial = np.vstack([P_initial, frontier.A[item].state[1]]) hull_vertices = range(self.d + 1) #make convex hull of points ******************************* temp_convex = self.make_convex_hull(P_initial, hull_vertices) P_initial = temp_convex[0] hull_vertices = temp_convex[1] #************************************************************ frontier.update([item-1 for item in hull_vertices if item-1 >= 0]) temp = self.update_convex_hull_epsilon(P_initial, frontier, hull_vertices, prob) P_new = temp[0] frontier = temp[1] hull_vertices = temp[2] #while P_initial and P_new are not equal while not (np.array_equal(P_initial, P_new)): #for i in range(1000): P_initial = P_new temp = self.update_convex_hull_epsilon(P_initial, frontier, hull_vertices, prob) P_new = temp[0] frontier = temp[1] hull_vertices = temp[2] print 'P_new', P_new return [val for val in P_new[1:]]
def update_convex_hull_epsilon(self, frontier, problem): """ this function gets set of current polytope vertices, generates new vectors inside \mathcal{V} polytope using clustering advantages and making a new convex hull of them. :param P_initial: matrix of d-dimensional rows, each row is a vertice of the given polytop. :param frontier: queue of type my_data_struc includes all nodes for extension :param hull_vertices: indices of P_initial vectors; we keep this index to not consider [0,...,0] vector in vector extensions. :param problem: the introduced problem as a mdp with two types of errors. :return: pairs of (P_initial, frontier) such that P_initial includes [0,..,0] vector too. """ frontier_addition = utils.my_data_struc() """P_new saves vertices of the given convex hull""" P_new = np.zeros(shape=(1, self.d)) for i in range(frontier.__len__()): P_new = np.vstack([P_new, frontier.A[i].state[1]]) P_initial = copy.copy(P_new) #TODO may be using frontier is not anymore useful in our new method. for node in frontier.A: for child in node.expand(problem= problem): if not (self.check_epsilon(child.state[1], P_initial, self.epsilon_error)): frontier_addition.append(child) P_new = np.vstack([P_new, child.state[1]]) """at the end of this loop, it added all new generated vectors of each vertice too""" length_hull_vertices = frontier.__len__() hull_vertices = range(length_hull_vertices) counter = 0 for node in frontier_addition.A: frontier.append(node) hull_vertices.append(length_hull_vertices+counter) counter += 1 temp_convex = self.make_convex_hull(P_new, hull_vertices) P_initial = temp_convex[0] hull_vertices = temp_convex[1] frontier.update([item-1 for item in hull_vertices if item-1 >= 0]) return (P_initial, frontier, hull_vertices)
def convex_hull_search_experimental(self, prob, random_lambdas, exact, aver_lambda): """ this function gets a problem as tree of nodes each node is pair of policy, V_bar and Uvec matrix as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25], [[1.00, 0.30] [0.50, 0.60] [0.78, 0.43] [1.40, 3.11]]) and tries to propagate all v_bar using extending node in each iteration and take their vertices of the optimal convex hull. :param problem: tree of nodes each node is pair of policy and V_bar as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25]) :return: returns set of approximated non-dominated v_bar vectors: vectors of dimension d """ """ an array initialized by [0, ..,0] vector of dimension d, P_initial keeps the vertices of optimal convex hull after any iteration """ P_initial= np.zeros(shape=(1, self.d)) m = self.m """we use frontier to keep each required vertice of convex hull inside it. this structure contains only the vectors.""" frontier = utils.my_data_struc() ''' make initial v_bars using d vectors in which each vector of the form [0,..,0,1,0,..,0] and saves three required information : [Policy, v_bar, Uvec_n_d] as a Node structure. This list is assumed as a state for the graph. ''' for i in range(self.d): m.set_Lambda(np.array([1 if j==i else 0 for j in xrange(self.d)])) Uvec_n_d = m.value_iteration(epsilon=0.00001) v_d = m.initial_states_distribution().dot(Uvec_n_d) n = Problem.Node([self.produce_policy(m.best_policy(Uvec_n_d)), v_d, Uvec_n_d]) frontier.append(n) """add v-bar vectors related to [0,..,0,1,0,..,0] identical lambda vetors for initializing the optimal \mathcal{V} polytope""" for item in range(self.d): P_initial = np.vstack([P_initial, frontier.A[item].state[1]]) #hull_vertices = range(P_initial.shape[0]) #************************************************************ """removes unused nodes from our graph. it means if related vector of Node is not considered in the convex hull update function will remove it from frontier""" temp = self.update_convex_hull_epsilon(frontier, prob) P_new = temp[0] frontier = temp[1] res = open("check" + ".txt", "w") #lists for saving error vs |V| length errors = [] queries = [] vector_length = [] iteration = 0 #while not(self.IsEqual(P_initial, P_new)): for i in range(20):#(250): if not(self.IsEqual(P_initial, P_new)): P_initial = P_new temp = self.update_convex_hull_epsilon(frontier, prob) P_new = temp[0] frontier = temp[1] iteration += 1 #to see error changes vs size of generated Vs vectors = [val for val in P_new[1:] if not all(v == 0.0 for v in val)] print >> res, "********** iteration", i, "******************" res.flush() queries_ave = [] errors_ave= [] res.flush() for j in range(aver_lambda): index = i*aver_lambda+j V = V_bar_search.V_bar_search(_mdp= self.m, _V_bar=vectors, lam_random = random_lambdas[index]) temp = V.v_optimal(_random_lambda_number = 1000) v_opt = temp[0] queries_ave.append(temp[1]) errors_ave.append(np.dot(random_lambdas[index], v_opt) - np.dot(random_lambdas[index], exact[index])) errors.append(np.abs(np.average(errors_ave))) queries.append(np.average(queries_ave)) vector_length.append(len(vectors)) print >> res, "errors_ave", errors print >> res, "vector length", vector_length print >> res, "asked queries", queries res.flush() else: print '*******final results **********', (vector_length, errors, queries, iteration) return (vector_length, errors, queries, iteration) if i % 10 == 0: print i,"=", P_new.shape[0] #print 'iteration', iteration print '*******final results **********', (vector_length, errors, queries, iteration) return (vector_length, errors, queries, iteration)
def convex_hull_search(self, prob): """ this function gets a problem as tree of nodes each node is pair of policy, V_bar and Uvec matrix as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25], [[1.00, 0.30] [0.50, 0.60] [0.78, 0.43] [1.40, 3.11]]) and tries to propagate all v_bar using extending node in each iteration and take their vertices of the optimal convex hull. :param problem: tree of nodes each node is pair of policy and V_bar as ({0:2, 1:0, 2:1, 3:2, 4:1}, [0.1,0.25]) :return: returns set of approximated non-dominated v_bar vectors: vectors of dimension d """ """ an array initialized by [0, ..,0] vector of dimension d, P_initial keeps the vertices of optimal convex hull after any iteration """ P_initial= np.zeros(shape=(1, self.d)) m = self.m """we use frontier to keep each required vertice of convex hull inside it. this structure contains only the vectors.""" frontier = utils.my_data_struc() ''' make initial v_bars using d vectors in which each vector of the form [0,..,0,1,0,..,0] and saves three required information : [Policy, v_bar, Uvec_n_d] as a Node structure. This list is assumed as a state for the graph. ''' for i in range(self.d): m.set_Lambda(np.array([1 if j==i else 0 for j in xrange(self.d)])) Uvec_n_d = m.value_iteration(epsilon=0.00001) v_d = m.initial_states_distribution().dot(Uvec_n_d) n = Problem.Node([self.produce_policy(m.best_policy(Uvec_n_d)), v_d, Uvec_n_d]) frontier.append(n) """add v-bar vectors related to [0,..,0,1,0,..,0] identical lambda vetors for initializing the optimal \mathcal{V} polytope""" for item in range(self.d): P_initial = np.vstack([P_initial, frontier.A[item].state[1]]) #hull_vertices = range(P_initial.shape[0]) #************************************************************ """removes unused nodes from our graph. it means if related vector of Node is not considered in the convex hull update function will remove it from frontier""" temp = self.update_convex_hull_epsilon(frontier, prob) P_new = temp[0] frontier = temp[1] iteration = 0 #while not(self.IsEqual(P_initial, P_new)): for i in range(250): if not(self.IsEqual(P_initial, P_new)): P_initial = P_new temp = self.update_convex_hull_epsilon(frontier, prob) P_new = temp[0] frontier = temp[1] iteration += 1 else: return ([val for val in P_new[1:] if not all(v == 0.0 for v in val)], iteration) if i % 10 == 0: print i,"=", P_new.shape[0] #print 'iteration', iteration return ([val for val in P_new[1:] if not all(v == 0.0 for v in val)], iteration)