def decodeCoordinate(self, memory=None, dim=1, return_list=False, suppress_value=None, decode_range=None): assert(dim == 1 or dim == 2 or dim == 3) if memory is None: memory = self.memory memory = helpers.normalize(memory) if decode_range is None: decode_range = self.valid_range if decode_range is None: raise ValueError("Decoding scalar values requires valid range (valid_range or decode_range parameter)") assert(len(decode_range) == dim) memory = self.reverse_permute(memory) if self.visualize: print("Output Reverse:") self.plot(np.reshape(memory,self.size)) memory = helpers.smooth(helpers.reShape(memory, dim),self.window_ratio) l = helpers.sideLength(memory.size, dim) if self.visualize: print("Output Smooth pre:") self.plot(np.reshape(memory,self.size)) if suppress_value is not None: memory = self.deductValue(memory,supress_value,HRR.valid_range) if self.visualize: print("Output Smooth (after suppression):") self.plot(np.reshape(memory,self.size)) result = [] if(self.peak_min == 0): self.peak_min = np.max(memory)/2 while np.max(memory) > self.peak_min_ratio * abs(np.mean(memory)) + self.peak_min: spot = list(np.unravel_index(np.argmax(memory),memory.shape)) for i in range(dim): spot[i] = helpers.reverse_scale(spot[i], l, decode_range[i]) result.append((spot, 1)) if return_list is False: return spot memory = self.deductValue(memory,spot,HRR.valid_range,dim, np.max(memory)) if self.visualize: print("Output Post Deduction:") self.plot(np.reshape(memory,self.size)) if len(result) == 0 and suppress_value is not None: return [(np.nan, 1)] if return_list else np.nan return result
def fit(self, X, num_iter=100, lr=1e-5, char_to_ix=None, dino_names=10): parameters = self.initialize_parameters() n_x, n_y, n_a = self.n_x, self.n_y, self.n_a loss = get_initial_loss(vocab_size, dino_names) a_prev = np.zeros((n_a, 1)) for j in range(num_iter): example = X[j % len(examples)] x = [None] + [char_to_ix[ch] for ch in example] y = x[1:] + [char_to_ix['\n']] cache = self.forward(x, a_prev, parameters) curr_loss = self.calculate_lost(y, cache) gradients, a = self.backpropagation(x, y, parameters, cache) gradients = self.clip(gradients, maxValue=5) parameters = self.update_parameters(lr, parameters, gradients) loss = smooth(loss, curr_loss) if j % 2000 == 0: print('Iteration: %d, Loss: %f' % (j, loss) + '\n') for name in range(dino_names): sampled_indices = self.sample(parameters, char_to_ix) print_sample(sampled_indices, ix_to_char) print('\n') return parameters
def plot(self, vect=None, unpermute=False, smooth=False, wide=False, multidim=False): if vect is None: vect = self.memory if unpermute: vect = self.reverse_permute(vect) if smooth: vect = helpers.smooth(vect) fig = plt.figure() if wide: widen = len(vect) * widening down = np.amin(vect) up = np.amax(vect) mean = np.amax(vect) - np.amin(vect) down -= mean * widening up += mean * widening plt.axis([-widen, len(vect) + widen, down, up]) if multidim: assert (len(vect.shape) < 3) if (len(vect.shape) == 1): vect = helpers.reShape(vect, 2) X = np.arange(-len(vect) / 2, len(vect) / 2, 1) Y = np.arange(-len(vect[0]) / 2, len(vect[0]) / 2, 1) X, Y = np.meshgrid(X, Y) ax = fig.gca(projection='3d') surf = ax.plot_surface(X, Y, vect, rstride=1, cstride=1, cmap='coolwarm', linewidth=0, antialiased=True) ax.set_zlim(np.min(vect) / 3, 1.1 * np.max(vect)) ax.set_xlabel('Y Index') ax.set_ylabel('X Index') ax.set_zlabel('Encoded Value') ax.set_xlim3d(-len(vect) / 2, len(vect) / 2) ax.set_ylim3d(-len(vect[0]) / 2, len(vect[0]) / 2) ax.azim = 200 fig.colorbar(surf, shrink=0.5, aspect=5) else: xx = range(len(vect)) plt.plot(xx, vect) #fig.savefig('temp.png', transparent=True) plt.show()
def deductValue(self, memory, value, input_range, dim = 1, height = 1): #result = self.permute(helpers.normalize(self.coordinate_encoder(input_value, encode_range))) assert(input_range is not None) # suppress the given values in the memory if not isinstance(value, (frozenset, list, np.ndarray, set, tuple)): value = [value] compensate = self.coordinate_encoder(value, input_range) # we have to smooth this to ensure correct alignment with the current memory compensate = helpers.reShape(compensate,dim) compensate = helpers.smooth(compensate, self.window_ratio) compensate[:] = [x * -height for x in compensate] if self.visualize: print("Output Supressed Value:") self.plot(np.reshape(compensate,self.size)) memory += compensate return memory
n_ep=args.n_ep, n_mcts=args.n_mcts, max_ep_len=args.max_ep_len, lr=args.lr, c=args.c, gamma=args.gamma, data_size=args.data_size, batch_size=args.batch_size, temp=args.temp, n_hidden_layers=args.n_hidden_layers, n_hidden_units=args.n_hidden_units) # Finished training: Visualize fig, ax = plt.subplots(1, figsize=[7, 5]) total_eps = len(episode_returns) episode_returns = smooth(episode_returns, args.window, mode='valid') ax.plot(symmetric_remove(np.arange(total_eps), args.window - 1), episode_returns, linewidth=4, color='darkred') ax.set_ylabel('Return') ax.set_xlabel('Episode', color='darkred') plt.savefig(os.getcwd() + '/learning_curve.png', bbox_inches="tight", dpi=300) # print('Showing best episode with return {}'.format(R_best)) # Env = make_game(args.game) # Env = wrappers.Monitor(Env,os.getcwd() + '/best_episode',force=True) # Env.reset() # Env.seed(seed_best)
discount_factor=discount_factor, lr=1e-3, epsilon=epsilon, memory=memory, experience_replay=experience_replay, true_gradient=true_gradient, batch_size=batch_size) else: dynaQ = TabularDynaQ(env, planning_steps=n, discount_factor=discount_factor, lr=learning_rate, epsilon=epsilon, deterministic=False) dynaQ.learn_policy(1000) # plot results plt.plot(smooth(dynaQ.episode_lengths, 10)) plt.title('Episode lengths Deep Dyna-Q (nongreedy)') # NB: lengths == returns plt.show() dynaQ.test_model_greedy(100) # plot results plt.plot(smooth(dynaQ.episode_lengths, 10)) print("Average episode length (greedy): {}".format( np.mean(np.array(dynaQ.episode_lengths)))) plt.title('Episode lengths Deep Dyna-Q (greedy)') # NB: lengths == returns plt.show()
def process_data(self): #smooth floor data Fdata_t = np.transpose(self._Fdata) for i in range(4): Fdata_t[i] = smooth(Fdata_t[i], 13, 'hanning') self._Fdata = np.transpose(Fdata_t) #apply floor transform for i in range(len(self._Bdata)): xrot = atan(self._Fdata[i][3] / self._Fdata[i][2]) R = rotation_matrix(xrot, 0.0, 0.0) self._Bdata[i] = rotate_body(R, self._Bdata[i]) if self._Tdata is None: self._Tdata = np.array([get_root_transform(self._Bdata[i])]) else: self._Tdata = np.append(self._Tdata, [get_root_transform(self._Bdata[i])], axis=0) Tdata_t = np.transpose(self._Tdata) for i in range(2, 5): Tdata_t[i] = smooth(Tdata_t[i], 13, 'hanning') self._Tdata = np.transpose(Tdata_t) for i in range(len(self._frame_count)): self._Bdata[i] = translate_body(self._Tdata[i][0], self._Tdata[i][1], self._Bdata[i]) R = rotation_matrix(self._Tdata[i][3], self._Tdata[i][4], self._Tdata[i][5]) self._Bdata[i] = rotate_body(R, self._Bdata[i]) self._joint_p = np.zeros((self._frame_count, PyKinectV2.JointType_Count * 3)) self._joint_v = np.zeros((self._frame_count, PyKinectV2.JointType_Count * 3)) for i in range(self._frame_count): self._joint_p[i * 3 + 0] = self._Bdata[i].Position.x self._joint_p[i * 3 + 1] = self._Bdata[i].Position.y self._joint_p[i * 3 + 2] = self._Bdata[i].Position.z for i in range(3, self._frame_count * 3): self._joint_v[i] = self._joint_p[i] - self._joint_p[i - 3] self._transx_v = np.zeros(self._frame_count) self._transz_v = np.zeros(self._frame_count) self._angy_v = np.zeros(self._frame_count) for i in range(1, self._frame_count): self._transx_v[i] = self._Tdata[i][0] - self._Tdata[i - 1][0] self._transz_v[i] = self._Tdata[i][2] - self._Tdata[i - 1][2] alpha = atan2(self._Tdata[i][5], self._Tdata[i][3]) beta = atan2(self._Tdata[i - 1][5], self._Tdata[i - 1][3]) delta = alpha - beta self._angy_v = delta for i in range(self._frame_count): #caculate the angle a between the facing direction and the z axis theta = atan2(self._Tdata[i][5], self._Tdata[i][3]) #rotate the x and z velocity by the angle a self._transx_v[i] = cos(theta * self._transx_v[i]) - sin(theta * self._transz_v[i]) self._transz_v[i] = sin(theta * self._transx_v[i]) + cos(theta * self._transz_v[i]) #TODO: pass over all joints again and interpolate any missing points self._gen_contact_labels() self._gen_phase_labels() self._traj_px = np.zeros((self._frame_count, self._sample_count)) self._traj_pz = np.zeros((self._frame_count, self._sample_count)) self._traj_dx = np.zeros((self._frame_count, self._sample_count)) self._traj_dz = np.zeros((self._frame_count, self._sample_count)) for i in range(self._frame_count): frame_samples = self._sample_frames(i) self._gen_trajectory(i, frame_samples)