def pendulum_traj_draw(traj, ax=None): plt.ion() if ax is None: fig = plt.figure() ax = fig.add_subplot(111) traj_array = np.array(traj) ax.hold(True) line = ax.plot(traj_array[:, 0], traj_array[:, 1], '-k') #add larger marker for the initial point ax.plot(traj_array[0, 0], traj_array[0, 1], '*k', markersize=10.0) #add arrow to curve utils.add_arrow_to_line2D(ax, line) return ax
def PendulumMDPValueLearningErrorDraw(err_lst_full, M, m_itrs): ''' plot Log-likelihood for test data performance ''' fig = plt.figure() ax = fig.add_subplot(111) ax.hold(True) colors = [(1.0, 0.0, 0.0), (0.5, 0.5, 0.0), (0.0, 1.0, 0.0), (0.0, 0.5, 0.5), (0.0, 0.0, 1.0)] lines = [] legend_txt = [] for m_idx, m_itr in enumerate(m_itrs): line, _ = utils.draw_err_bar_with_filled_shape( ax, M, np.mean(err_lst_full[m_idx], axis=1), np.std(err_lst_full[m_idx], axis=1), colors[m_idx % len(colors)]) lines.append(line) legend_txt.append('Max Iterations: {0}'.format(m_itr)) #prepare legend, axis text... plt.legend(lines, legend_txt, loc='best') ax.yaxis.grid() ax.xaxis.grid() ax.set_xlabel('Model size - M', fontsize=20) ax.set_ylabel('Log-likelihood', fontsize=20) ax.set_title('Log-likelihood of Test Data versus Model Size', fontsize=20) plt.draw() return ax
def PendulumMDPValueLearningErrorDraw(err_lst_full, M, m_itrs): ''' plot Log-likelihood for test data performance ''' fig = plt.figure() ax = fig.add_subplot(111) ax.hold(True) colors = [(1.0, 0.0, 0.0), (0.5, 0.5, 0.0), (0.0, 1.0, 0.0), (0.0, 0.5, 0.5), (0.0, 0.0, 1.0)] lines = [] legend_txt = [] for m_idx, m_itr in enumerate(m_itrs): line, _ = utils.draw_err_bar_with_filled_shape(ax, M, np.mean(err_lst_full[m_idx], axis=1), np.std(err_lst_full[m_idx], axis=1), colors[m_idx%len(colors)]) lines.append(line) legend_txt.append('Max Iterations: {0}'.format(m_itr)) #prepare legend, axis text... plt.legend(lines, legend_txt, loc='best') ax.yaxis.grid() ax.xaxis.grid() ax.set_xlabel('Model size - M', fontsize=20) ax.set_ylabel('Log-likelihood', fontsize=20) ax.set_title('Log-likelihood of Test Data versus Model Size', fontsize=20) plt.draw() return ax
def DiscretizeSystem(self, sys, costfunc, xbins, ubins, options=dict()): #check system is_ct = sys.is_ct_ #check dimension if not isinstance(xbins, list): #convert to multi-dimension case self.xbins_ = [xbins] else: self.xbins_ = xbins if not isinstance(ubins, list): self.ubins_ = [ubins] else: self.ubins_ = ubins self.state_dim_ = len(self.xbins_) self.ctrl_dim = len(self.ubins_) if 'dt' not in options: self.dt_ = 1.0 else: self.dt_ = options['dt'] if 'wrap_flag' not in options: self.wrap_flag_ = False * np.ones(self.state_dim_) else: self.wrap_flag_ = options['wrap_flag'] wrap_idx = np.where(self.wrap_flag_ == True)[0] xmin = np.array([bin[0] for bin in self.xbins_]) xmax = np.array([bin[-1] for bin in self.xbins_]) #construct grids #state Sgrid = np.meshgrid(*self.xbins_) #for each dim, need to reshape to a long 1-d array self.S_ = np.array([np.reshape(dim, (1, -1))[0] for dim in Sgrid]) #action Agrid = np.meshgrid(*self.ubins_) self.A_ = np.array([np.reshape(dim, (1, -1))[0] for dim in Agrid]) self.num_state_ = self.S_.shape[1] self.num_action_ = self.A_.shape[1] #prepare the transition matrix # self.T_ = csr_matrix([np.zeros([self.num_state_, self.num_state_]) for dim_ix in range(self.num_action_)]) # self.T_ = [csr_matrix(np.zeros([self.num_state_, self.num_state_])) for dim_ix in range(self.num_action_)] self.T_ = [ np.zeros([self.num_state_, self.num_state_]) for dim_ix in range(self.num_action_) ] #prepare cost function self.C_ = np.zeros([self.num_state_, self.num_action_]) #inline function to search index in reshaped state #offset for sub2ind sub2ind = self.MakeSub2Ind(self.xbins_) xdigitize, xdigitize_dim = self.MakeXDigitize(self.xbins_) print 'Constructing transition matrix...' #vectorize this to increase the efficiency if possible... for action_idx in range(self.num_action_): for state_idx in range(self.num_state_): if is_ct: # the system must be an update equation x_new = sys.Dynamics(self.S_[:, state_idx], self.A_[:, action_idx]) self.C_[state_idx, action_idx] = sys.dt_ * costfunc( self.S_[:, state_idx], self.A_[:, action_idx], sys) if isinstance(x_new, list): #contains both expected state and diagonal Gaussian noise... x_new_mu = x_new[0] x_new_sig = x_new[1] if len(x_new_mu) != len(x_new_sig): print 'Inconsistent length of state and noise vector...' return #wrap x_new if needed, this is useful for state variable like angular position x_new_mu[wrap_idx] = np.mod( x_new_mu[wrap_idx] - xmin[wrap_idx], xmax[wrap_idx] - xmin[wrap_idx]) + xmin[wrap_idx] x_new_mu_idx = xdigitize(x_new_mu) x_new_mu_digitized_state = self.S_[:, sub2ind(x_new_mu_idx )] coeff_lst = [] involved_states = [] for dim_idx in range(len(x_new_mu)): tmp_x_new_mu_idx = [idx for idx in x_new_mu_idx] #for each dim, try to crawl the grid #find lower bound, use the interval [-2*sigma, 2*sigma] #how to wrap here? or just truncate the shape of gaussian?... x_new_mu_tmp_min = np.array(x_new_mu) x_new_mu_tmp_max = np.array(x_new_mu) x_new_mu_tmp_min[ dim_idx] += -2 * x_new_sig[dim_idx] x_new_mu_tmp_max[dim_idx] += 2 * x_new_sig[dim_idx] min_idx = xdigitize_dim(x_new_mu_tmp_min, dim_idx) max_idx = xdigitize_dim(x_new_mu_tmp_max, dim_idx) for step_idx in range(min_idx, max_idx + 1): tmp_x_new_mu_idx[dim_idx] = step_idx #get the index of involved state involved_state_idx = sub2ind(tmp_x_new_mu_idx) involved_states.append(involved_state_idx) coeff_lst.append( np.exp(-np.linalg.norm(( (self.S_[:, involved_state_idx] - x_new_mu_digitized_state) / x_new_sig))**2)) coeff_lst = coeff_lst / np.sum(coeff_lst) #assign transition probability for each state for coeff, involved_state_idx in zip( coeff_lst.tolist(), involved_states): self.T_[action_idx][state_idx, involved_state_idx] += coeff else: #only updated state is available, need to map it to the grid #add Baryinterpolation? #wrap x_new if needed, this is useful for state variable like angular position x_new[wrap_idx] = np.mod( x_new[wrap_idx] - xmin[wrap_idx], xmax[wrap_idx] - xmin[wrap_idx]) + xmin[wrap_idx] #barycentricinterpolation... indices, coeffs = utils.BarycentricInterpolation( self.xbins_, np.array([x_new])) for i in range(len(indices[0])): self.T_[action_idx][state_idx, indices[0, i]] = coeffs[0, i] else: #discrete state dynamical system... #for discrete state dynamics, take the direct returned states and associated probability x_new_lst = sys.Dynamics(self.S_[:, state_idx], self.A_[:, action_idx]) self.C_[state_idx, action_idx] = costfunc(self.S_[:, state_idx], self.A_[:, action_idx], sys) for x_new in x_new_lst: #get index of x_new x_new_idx = xdigitize(x_new[0]) state_new_idx = sub2ind(x_new_idx) self.T_[action_idx][state_idx, state_new_idx] = x_new[1] return