def reset(self): FA.reset(self) # initialize the LWPR function self.lwpr = LWPR(self.indim, self.outdim) self.lwpr.init_D = 10.*np.eye(self.indim) self.lwpr.init_alpha = 0.1*np.ones([self.indim, self.indim]) self.lwpr.meta = True
class LWPRFA(FA): parametric = False def __init__(self, indim, outdim): FA.__init__(self, indim, outdim) self.filename = None def reset(self): FA.reset(self) # initialize the LWPR function self.lwpr = LWPR(self.indim, self.outdim) self.lwpr.init_D = 10.*np.eye(self.indim) self.lwpr.init_alpha = 0.1*np.ones([self.indim, self.indim]) self.lwpr.meta = True def predict(self, inp): """ predict the output for the given input. """ # the next 3 lines fix a bug when lwpr models are pickled and unpickled again # without it, a TypeError is thrown "Expected a double precision numpy array." # even though the numpy array is double precision. inp = self._asFlatArray(inp) inp_tmp = np.zeros(inp.shape) inp_tmp[:] = inp return self.lwpr.predict(inp_tmp) def train(self): for i, t in self.dataset: i = self._asFlatArray(i) t = self._asFlatArray(t) self.lwpr.update(i, t) def _cleanup(self): if self.filename and os.path.exists(self.filename): os.remove(self.filename) def __getstate__(self): """ required for pickle. removes the lwpr model from the dictionary and saves it to file explicitly. """ # create unique hash key for filename and write lwpr to file hashkey = hashlib.sha1(str(self.lwpr) + time.ctime() + str(np.random.random())).hexdigest()[:8] if not os.path.exists('.lwprmodels'): os.makedirs('.lwprmodels') # remove any old files if existing if self.filename: os.remove(self.filename) self.filename = '.lwprmodels/lwpr_%s.binary'%hashkey self.lwpr.write_binary(self.filename) # remove lwpr from dictionary and return state state = self.__dict__.copy() del state['lwpr'] return state def __setstate__(self, state): """ required for pickle. loads the stored lwpr model explicitly. """ self.__dict__.update(state) self.lwpr = LWPR(self.filename)
def __setstate__(self, state): """ required for pickle. loads the stored lwpr model explicitly. """ self.__dict__.update(state) self.lwpr = LWPR(self.filename)
def test_lwpr_ref(datafile, resultfolder, model_epoch, hist_window, saveFigure=False, modelfile='lwpr_model'): if (robot_type == 'Vulcan'): plot_cmd_divider = 100.0 elif (robot_type == 'Fetch') or (robot_type == 'Fetch2') or (robot_type == 'MagicBot'): plot_cmd_divider = 1.0 # load data file dataset = loadmat(datafile) test_data_x = dataset['reference_input'] num_test, test_dim = test_data_x.shape[0], 125 speed_hw, cmd_hw = hist_window[0], hist_window[1] input_dim = 2*(speed_hw+cmd_hw) # load model files modelfile_name = './' + resultfolder + '/' + modelfile + '_best_left_epoch' + str(model_epoch) + '.bin' best_left_model = LWPR(modelfile_name) print 'Read Left model (%d)' % (best_left_model.num_rfs[0]) modelfile_name = './' + resultfolder + '/' + modelfile + '_best_right_epoch' + str(model_epoch) + '.bin' best_right_model = LWPR(modelfile_name) print 'Read Right model (%d)' % (best_right_model.num_rfs[0]) # start making 5 seconds simulation pred_on_test = np.zeros((num_test, 2*test_dim)) tmp_x = np.zeros((input_dim, 1)) pred_left_speed = np.zeros((num_test, 125)) pred_right_speed = np.zeros((num_test, 125)) forward_cmd = np.zeros((num_test, 125)) left_cmd = np.zeros((num_test, 125)) print 'start prediction on reference input' start_test_time = timeit.default_timer() for data_cnt in range(num_test): for pred_cnt in range(test_dim): if pred_cnt < speed_hw: num_value_from_data = 2*(speed_hw-pred_cnt) tmp_x[0:num_value_from_data, 0] = test_data_x[data_cnt, 2*pred_cnt:2*speed_hw] tmp_x[num_value_from_data:2*speed_hw, 0] = pred_on_test[data_cnt, 0:2*pred_cnt] else: tmp_x[0:2*speed_hw, 0] = pred_on_test[data_cnt, 2*(pred_cnt-speed_hw):2*pred_cnt] tmp_x[2*speed_hw:input_dim, 0] = test_data_x[data_cnt, 2*(speed_hw+pred_cnt):2*(speed_hw+pred_cnt+cmd_hw)] pred_on_test[data_cnt, 2*pred_cnt], _ = best_left_model.predict_conf(tmp_x) pred_on_test[data_cnt, 2*pred_cnt+1], _ = best_right_model.predict_conf(tmp_x) tmp_cmd = test_data_x[data_cnt, (input_dim-2):(input_dim-2+250)].reshape((125, 2)) tmp_pred = pred_on_test[data_cnt, :].reshape((125, 2)) pred_left_speed[data_cnt,:] = tmp_pred[:,0] pred_right_speed[data_cnt,:] = tmp_pred[:,1] forward_cmd[data_cnt,:] = tmp_cmd[:,0] left_cmd[data_cnt,:] = tmp_cmd[:,1] if data_cnt%1000 == 0: print '\t\t', data_cnt end_test_time = timeit.default_timer() print 'finish prediction on reference input' save_file_name = './' + resultfolder + '/ref_input_result_LWPR.mat' result={} result['test_time'] = end_test_time - start_test_time result['model_output_on_test_data'] = pred_on_test result['joystick_command'] = test_data_x[:, (input_dim-2):(input_dim-2+250)] savemat(save_file_name, result) # plot (lots of) graphs if saveFigure: plot_path = os.getcwd() plot_path = plot_path + '/' + resultfolder if not('RefResponsePlot_LWPR' in os.listdir(plot_path)): dir_path = plot_path + '/RefResponsePlot_LWPR' os.mkdir(dir_path) pdf_name = plot_path + '/LWPR_ref_response_plot.pdf' pp = PdfPages(pdf_name) for cnt in range(num_test): plot_x = np.linspace(0, 5, num=125, endpoint=True) plot_y1 = pred_left_speed[cnt,:] plot_y2 = pred_right_speed[cnt,:] plot_cmd1 = forward_cmd[cnt,:]/plot_cmd_divider plot_cmd2 = left_cmd[cnt,:]/plot_cmd_divider title1 = 'Left Wheel response to ref input ' + str(cnt) title2 = 'Right Wheel response to ref input ' + str(cnt) filename = plot_path + '/RefResponsePlot_LWPR/' + str(cnt) + '.png' #ymax1 = max(np.amax(plot_y1)+0.1, np.amax(plot_cmd1)+0.1, np.amax(plot_cmd2)+0.1, 0.25) #ymin1 = min(np.amin(plot_y1)-0.1, np.amin(plot_cmd1)-0.1, np.amin(plot_cmd2)-0.1, -0.25) #ymax2 = max(np.amax(plot_y2)+0.1, np.amax(plot_cmd1)+0.1, np.amax(plot_cmd2)+0.1, 0.25) #ymin2 = min(np.amin(plot_y2)-0.1, np.amin(plot_cmd1)-0.1, np.amin(plot_cmd2)-0.1, -0.25) ymax1 = max(np.amax(plot_y1)+0.1, np.amax(plot_cmd1)+0.1, 0.25) ymin1 = min(np.amin(plot_y1)-0.1, np.amin(plot_cmd1)-0.1, -0.25) ymax2 = max(np.amax(plot_y2)+0.1, np.amax(plot_cmd2)+0.1, 0.25) ymin2 = min(np.amin(plot_y2)-0.1, np.amin(plot_cmd2)-0.1, -0.25) fig1 = plt.figure() plt.subplot(211) plt.plot(plot_x, plot_y1, 'r--', label='Predicted Speed') plt.plot(plot_x, plot_cmd1, 'k--', label='Left Wheel Command') #plt.plot(plot_x, plot_cmd1, 'k--', label='Forward Command') #plt.plot(plot_x, plot_cmd2, 'k-.', label='Left Command') plt.ylim(ymin1, ymax1) plt.title(title1) #plt.legend(loc=2) plt.subplot(212) plt.plot(plot_x, plot_y2, 'r--', label='Predicted Speed') plt.plot(plot_x, plot_cmd2, 'k--', label='Right Wheel Command') #plt.plot(plot_x, plot_cmd1, 'k--', label='Forward Command') #plt.plot(plot_x, plot_cmd2, 'k-.', label='Left Command') plt.ylim(ymin2, ymax2) plt.title(title2) plt.legend(loc='center left', bbox_to_anchor=(1.0, 1.0)) fig1.savefig(filename, bbox_inches='tight', pad_inches=0) pp.savefig() plt.close() pp.close()
def test_lwpr_5sec(datafile, resultfolder, model_epoch, hist_window, robot_type, cmd_scaler=1.0, modelfile='lwpr_model', saveFigure=False): if (robot_type == 'Vulcan'): plot_cmd_divider = 100.0 elif (robot_type == 'Fetch') or (robot_type == 'Fetch2') or (robot_type == 'MagicBot'): plot_cmd_divider = 1.0 # load data file dataset = loadmat(datafile) test_data_x, test_data_y = dataset['test_data_x'], dataset['test_data_y'] num_test, test_dim = test_data_x.shape[0], test_data_y.shape[1]//2 speed_hw, cmd_hw = hist_window[0], hist_window[1] input_dim = 2*(speed_hw+cmd_hw) # normalize command part test_data_x[:, 2*speed_hw:] = test_data_x[:, 2*speed_hw:] * cmd_scaler # load model files modelfile_name = './' + resultfolder + '/' + modelfile + '_best_left_epoch' + str(model_epoch) + '.bin' best_left_model = LWPR(modelfile_name) print 'Read Left model (%d)' % (best_left_model.num_rfs[0]) modelfile_name = './' + resultfolder + '/' + modelfile + '_best_right_epoch' + str(model_epoch) + '.bin' best_right_model = LWPR(modelfile_name) print 'Read Right model (%d)' % (best_right_model.num_rfs[0]) result_file_name = './' + resultfolder + '/Result_of_training_epoch' + str(model_epoch) + '.mat' result_mat = loadmat(result_file_name) train_time = result_mat['train_time'] hist_valid_error = result_mat['history_validation_error'] # start making 5 seconds simulation pred_on_test = np.zeros((num_test, 2*test_dim)) tmp_x = np.zeros((input_dim, 1)) print 'start prediction on test data' start_test_time = timeit.default_timer() for data_cnt in range(num_test): for pred_cnt in range(test_dim): if pred_cnt < speed_hw: num_value_from_data = 2*(speed_hw-pred_cnt) tmp_x[0:num_value_from_data, 0] = test_data_x[data_cnt, 2*pred_cnt:2*speed_hw] tmp_x[num_value_from_data:2*speed_hw, 0] = pred_on_test[data_cnt, 0:2*pred_cnt] else: tmp_x[0:2*speed_hw, 0] = pred_on_test[data_cnt, 2*(pred_cnt-speed_hw):2*pred_cnt] tmp_x[2*speed_hw:input_dim, 0] = test_data_x[data_cnt, 2*(speed_hw+pred_cnt):2*(speed_hw+pred_cnt+cmd_hw)] pred_on_test[data_cnt, 2*pred_cnt], _ = best_left_model.predict_conf(tmp_x) pred_on_test[data_cnt, 2*pred_cnt+1], _ = best_right_model.predict_conf(tmp_x) if data_cnt%5000 == 0: print '\t\t', data_cnt end_test_time = timeit.default_timer() diff = abs(test_data_y[0:num_test, 0:2*test_dim] - pred_on_test) max_index = diff.argmax() // (test_dim*2) error = np.asarray([np.sum(diff)/float(num_test), np.sqrt(np.sum(diff**2)/float(num_test)), np.max(diff)]) print 'Error on Test Data! %f/%f/%f' %(error[0], error[1], error[2]) save_file_name = './' + resultfolder + '/LWPR_1D_model_test_result.mat' result={} result['train_time'] = train_time result['test_time'] = end_test_time - start_test_time result['history_validation_error'] = hist_valid_error result['test_error'] = error result['model_output_on_test_data'] = pred_on_test result['joystick_command'] = test_data_x[:, (input_dim-2):(input_dim-2+250)] savemat(save_file_name, result) # plot (lots of) graphs if saveFigure: real_y_tmp, pred_y_tmp, joystick_cmd_tmp = test_data_y.reshape((num_test,125,2)), pred_on_test.reshape((num_test,125,2)), test_data_x[:, 2*(input_dim//2-1):2*(input_dim//2+124)].reshape((num_test,125,2)) real_y_left, real_y_right = np.zeros((num_test,125)), np.zeros((num_test,125)) pred_left_speed, pred_right_speed = np.zeros((num_test,125)), np.zeros((num_test,125)) forward_cmd, left_cmd = np.zeros((num_test,125)), np.zeros((num_test,125)) for cnt in range(num_test): real_y_left[cnt,:] = real_y_tmp[cnt,:,0] real_y_right[cnt,:] = real_y_tmp[cnt,:,1] pred_left_speed[cnt,:] = pred_y_tmp[cnt,:,0] pred_right_speed[cnt,:] = pred_y_tmp[cnt,:,1] forward_cmd[cnt,:] = joystick_cmd_tmp[cnt,:,0] left_cmd[cnt,:] = joystick_cmd_tmp[cnt,:,1] plot_path = os.getcwd() plot_path = plot_path + '/' + resultfolder if not('ResultPlot' in os.listdir(plot_path)): dir_path = plot_path + '/ResultPlot' os.mkdir(dir_path) pdf_name = plot_path + '/LWPR_test_plot.pdf' pp = PdfPages(pdf_name) for cnt in range(num_test // 25 + 1): if cnt < num_test//25: plot_cnt = cnt else: plot_cnt= max_index / 25.0 plot_x = np.linspace(plot_cnt, plot_cnt+5, num=125, endpoint=True) plot_y1 = pred_left_speed[int(plot_cnt*25),:] plot_y2 = pred_right_speed[int(plot_cnt*25),:] plot_real_y1 = real_y_left[int(plot_cnt*25),:] plot_real_y2 = real_y_right[int(plot_cnt*25),:] plot_for_cmd = forward_cmd[int(plot_cnt*25),:]/plot_cmd_divider plot_left_cmd = left_cmd[int(plot_cnt*25),:]/plot_cmd_divider title1 = 'Left Wheel 5sec Prediction from ' + str(plot_cnt) title2 = 'Right Wheel 5sec Prediction from ' + str(plot_cnt) filename = plot_path + '/ResultPlot/5secLTS_' + str(plot_cnt) + '.png' fig1 = plt.figure() plt.subplot(211) plt.plot(plot_x, plot_y1, 'r--', label='Predicted Speed') plt.plot(plot_x, plot_real_y1, 'b-.', label='Encoded Speed') if (robot_type == 'Vulcan'): plt.plot(plot_x, plot_for_cmd, 'k--', label='Forward Command') plt.plot(plot_x, plot_left_cmd, 'k-.', label='Left Command') elif (robot_type == 'Fetch') or (robot_type == 'Fetch2') or (robot_type == 'MagicBot'): plt.plot(plot_x, plot_for_cmd, 'k--', label='Left Command') plt.title(title1) #plt.legend(loc=2) plt.subplot(212) plt.plot(plot_x, plot_y2, 'r--', label='Predicted Speed') plt.plot(plot_x, plot_real_y2, 'b-.', label='Encoded Speed') if (robot_type == 'Vulcan'): plt.plot(plot_x, plot_for_cmd, 'k--', label='Forward Command') plt.plot(plot_x, plot_left_cmd, 'k-.', label='Left Command') elif (robot_type == 'Fetch') or (robot_type == 'Fetch2') or (robot_type == 'MagicBot'): plt.plot(plot_x, plot_left_cmd, 'k--', label='Right Command') plt.title(title2) plt.legend(loc='center left', bbox_to_anchor=(1.0, 1.0)) fig1.savefig(filename, bbox_inches='tight', pad_inches=0) pp.savefig() plt.close() pp.close()
def train_lwpr(datafile, resultfolder, max_num_train, patience_list, improvement_threshold, init_lwpr_setting, hist_window, start_epoch=0, cmd_scaler=1.0, modelfile='lwpr_model'): curr_path = os.getcwd() if resultfolder in os.listdir(curr_path): print "subfolder exists" else: print "Not Exist, so make subfolder" os.mkdir(resultfolder) # Load Data dataset = loadmat(datafile) train_data_x, train_data_y = dataset['train_data_x'], dataset['train_data_y'] valid_data_x, valid_data_y = dataset['valid_data_x'], dataset['valid_data_y'] num_data, num_valid = train_data_x.shape[0], valid_data_x.shape[0] speed_hw, cmd_hw = hist_window[0], hist_window[1] input_dim = 2*(speed_hw+cmd_hw) # normalize command part train_data_x[:, 2*speed_hw:] = train_data_x[:, 2*speed_hw:] * cmd_scaler valid_data_x[:, 2*speed_hw:] = valid_data_x[:, 2*speed_hw:] * cmd_scaler # Set-up Parameters/Model for Training Procedure max_num_trials = max_num_train improvement_threshold = improvement_threshold error_hist, best_model_error, prev_train_time = [], np.inf, 0 initD, initA, penalty = init_lwpr_setting[0], init_lwpr_setting[1], init_lwpr_setting[2] w_gen, w_prune = init_lwpr_setting[3], init_lwpr_setting[4] best_model_epoch = 0 if start_epoch < 1: # Initialize Two 1-Dimensional Models LWPR_model_left = LWPR(input_dim, 1) #LWPR_model_left.init_D = initD * np.eye(input_dim) tmp_arr = np.ones(input_dim) tmp_arr[input_dim-2*cmd_hw:input_dim] = init_lwpr_setting[5] LWPR_model_left.init_D = initD * np.diag(tmp_arr) LWPR_model_left.update_D = False # True #LWPR_model_left.init_alpha = initA * np.eye(input_dim) tmp_arr = np.ones(input_dim) tmp_arr[input_dim-2*cmd_hw:input_dim] = init_lwpr_setting[5] LWPR_model_left.init_alpha = initA * np.diag(tmp_arr) LWPR_model_left.penalty = penalty LWPR_model_left.meta = True LWPR_model_left.meta_rate = 20 LWPR_model_left.w_gen = w_gen LWPR_model_left.w_prune = w_prune LWPR_model_right = LWPR(input_dim, 1) #LWPR_model_right.init_D = initD * np.eye(input_dim) tmp_arr = np.ones(input_dim) tmp_arr[input_dim-2*cmd_hw:input_dim] = init_lwpr_setting[5] LWPR_model_right.init_D = initD * np.diag(tmp_arr) LWPR_model_right.update_D = False # True #LWPR_model_right.init_alpha = initA * np.eye(input_dim) tmp_arr = np.ones(input_dim) tmp_arr[input_dim-2*cmd_hw:input_dim] = init_lwpr_setting[5] LWPR_model_right.init_alpha = initA * np.diag(tmp_arr) LWPR_model_right.penalty = penalty LWPR_model_right.meta = True LWPR_model_right.meta_rate = 20 LWPR_model_right.w_gen = w_gen LWPR_model_right.w_prune = w_prune patience = patience_list[0] else: modelfile_name = './' + resultfolder + '/' + modelfile + '_left_epoch' + str(start_epoch-1) + '.bin' LWPR_model_left = LWPR(modelfile_name) print '\tRead LWPR model for left wheel(%d)' % (LWPR_model_left.num_rfs[0]) modelfile_name = './' + resultfolder + '/' + modelfile + '_right_epoch' + str(start_epoch-1) + '.bin' LWPR_model_right = LWPR(modelfile_name) print '\tRead LWPR model for right wheel(%d)' % (LWPR_model_right.num_rfs[0]) result_file_name = './' + resultfolder + '/Result_of_training_epoch' + str(start_epoch-1) + '.mat' result_file = loadmat(result_file_name) prev_train_time = result_file['train_time'] patience = result_file['patience'] best_model_error = result_file['best_model_error'] for cnt in range(start_epoch): error_hist.append([result_file['history_validation_error'][cnt][0], result_file['history_validation_error'][cnt][1], result_file['history_validation_error'][cnt][2]]) # Training Part model_prediction = np.zeros(valid_data_y.shape) tmp_x, tmp_y = np.zeros((input_dim, 1)), np.zeros((1,1)) print 'start training' start_train_time = timeit.default_timer() for train_cnt in range(start_epoch, max_num_trials): if patience < train_cnt: break rand_ind = np.random.permutation(num_data) for data_cnt in range(num_data): tmp_x[:,0] = train_data_x[rand_ind[data_cnt], 0:input_dim] tmp_y[0,0] = train_data_y[rand_ind[data_cnt], 0] _ = LWPR_model_left.update(tmp_x, tmp_y) tmp_y[0,0] = train_data_y[rand_ind[data_cnt], 1] _ = LWPR_model_right.update(tmp_x, tmp_y) if data_cnt % 5000 == 0: print '\ttrain epoch %d, data index %d, #rfs=%d/%d' % (train_cnt, data_cnt, LWPR_model_left.num_rfs, LWPR_model_right.num_rfs) for data_cnt in range(num_valid): tmp_x[:,0] = valid_data_x[data_cnt, 0:input_dim] model_prediction[data_cnt, 0], _ = LWPR_model_left.predict_conf(tmp_x) model_prediction[data_cnt, 1], _ = LWPR_model_right.predict_conf(tmp_x) diff = abs(valid_data_y - model_prediction) new_error = np.asarray([np.sum(diff)/float(num_valid), np.sqrt(np.sum(diff**2)/float(num_valid)), np.max(diff)]) error_hist.append([new_error[0], new_error[1], new_error[2]]) # save result of one training epoch modelfile_name = './' + resultfolder + '/' + modelfile + '_left_epoch' + str(train_cnt) + '.bin' LWPR_model_left.write_binary(modelfile_name) modelfile_name = './' + resultfolder + '/' + modelfile + '_right_epoch' + str(train_cnt) + '.bin' LWPR_model_right.write_binary(modelfile_name) if new_error[1] < best_model_error * improvement_threshold: best_model_epoch = train_cnt best_model_error = new_error[1] patience = max(patience, min(train_cnt+10, int(train_cnt * patience_list[1])) ) modelfile_name = './' + resultfolder + '/' + modelfile + '_best_left_epoch' + str(train_cnt) + '.bin' LWPR_model_left.write_binary(modelfile_name) modelfile_name = './' + resultfolder + '/' + modelfile + '_best_right_epoch' + str(train_cnt) + '.bin' LWPR_model_right.write_binary(modelfile_name) result_file_name = './' + resultfolder + '/Result_of_training_epoch' + str(train_cnt) + '.mat' result = {} result['train_time'] = timeit.default_timer() - start_train_time + prev_train_time result['best_model_error'] = best_model_error result['history_validation_error'] = error_hist result['patience'] = patience result['improvement_threshold'] = improvement_threshold result['init_D'] = initD result['init_alpha'] = initA result['penalty'] = penalty result['w_generate_criterion'] = w_gen result['w_prune_criterion'] = w_prune result['number_speed_in_input'] = 2*speed_hw result['number_cmd_in_input'] = 2*cmd_hw savemat(result_file_name, result) print '\n\tSave Intermediate Result Successfully' print '\t%d-th learning : #Data=%d/%d, #rfs=%d/%d, error=%f\n' %(train_cnt, LWPR_model_left.n_data, LWPR_model_right.n_data, LWPR_model_left.num_rfs, LWPR_model_right.num_rfs, error_hist[train_cnt][1]) print 'end training' return best_model_epoch
def main(): parser = argparse.ArgumentParser() parser.add_argument("--environment", type=str, default='AntBulletEnv-v0') parser.add_argument("--no_data_start", type=int, default=10000) parser.add_argument("--train_policy_batch_size", type=int, default=30) parser.add_argument("--cma_maxiter", type=int, default=1000) parser.add_argument("--unroll_steps", type=int, default=200) args = parser.parse_args() print args env = gym.make(args.environment) input_dim = env.observation_space.shape[0]+env.action_space.shape[0] output_dim = env.observation_space.shape[0] + 1 model = LWPR(input_dim, output_dim) model.init_D = 1. * np.eye(input_dim) model.update_D = True model.init_alpha = 20. * np.eye(input_dim) model.meta = True agent = AGENT(env.observation_space.shape[0], env.action_space.shape[0], action_space_low=env.action_space.low, action_space_high=env.action_space.high, unroll_steps=args.unroll_steps) init_states = np.stack([env.reset() for _ in range(args.train_policy_batch_size)], axis=0) #Train the dynamics model the intial data. data_buffer = gather_data3(env, args.no_data_start) states, actions, rewards, next_states, _ = zip(*data_buffer) states = np.stack(states, axis=0) actions = np.stack(actions, axis=0) rewards = np.array(rewards)[..., np.newaxis] next_states = np.stack(next_states, axis=0) state_actions = np.concatenate([states, actions], axis=-1) state_diff = next_states - states targets = np.concatenate([state_diff, rewards], axis=-1) assert len(state_actions) == len(targets) ind = np.random.permutation(len(state_actions)) for i in range(len(state_actions)): model.update(state_actions[ind[i]], targets[ind[i]]) for epoch in range(1000): agent._fit(model, init_states, args.cma_maxiter) total_rewards = 0. state = env.reset() while True: action = agent._forward(agent.thetas, state[np.newaxis, ...])[0] next_state, reward, done, _ = env.step(action) state_action = np.concatenate([state, action]) state_diff = next_state - state target = np.append(state_diff, reward) model.update(state_action, target) total_rewards += float(reward) state = next_state.copy() if done: print 'epoch:', epoch, 'total_rewards:', total_rewards break
context1 = [2.0, context[0]] context2 = [3.6, context[1]] circle1 = plt.Circle((context1[0], context1[1]), 0.1, color='b', fill=False) circle2 = plt.Circle((context2[0], context2[1]), 0.1, color='b', fill=False) ax = plt.gca() ax.add_artist(circle1) ax.add_artist(circle2) plt.grid() plt.savefig('/home/fmeccanici/Documents/thesis/thesis_workspace/src/promp_demo_2d/figures/lwpr/lwpr_demos.png') plt.clf() # initialize lwpr model model = LWPR(n_in, n_out) model.init_D = 10*eye(n_in) model.init_alpha = 0.1* eye(n_in) # model.kernel = 'BiSquare' for i in range(10): for demonstration in demonstrations: output = np.asarray(demonstration[0]) context = np.asarray(demonstration[1]) # print("added output: " + str(output)) # print("added context: " + str(context)) model.update(context, output) # generalize
# Copied from http://www.rueckstiess.net/research/snippets/show/9bd4b418 from numpy import * from matplotlib import pyplot as plt from lwpr import LWPR def testfunc(x): return 10 * sin(7.8 * log(1 + x)) / (1 + 0.1 * x**2) Ntr = 500 Xtr = 10 * random.random((Ntr, 1)) Ytr = 5 + testfunc(Xtr) + 0.1 * random.normal(0, 1, (Ntr, 1)) * Xtr # initialize the LWPR model model = LWPR(1, 1) model.init_D = 20 * eye(1) model.update_D = True model.init_alpha = 40 * eye(1) model.meta = False model.penalty = 1e-4 model.diag_only = True # train the model for k in range(20): ind = random.permutation(Ntr) mse = 0 for i in range(Ntr): yp = model.update(Xtr[ind[i]], Ytr[ind[i]]) mse = mse + (Ytr[ind[i], :] - yp)**2