exp_data.push(rollout(env, randpol, max_steps=max_timestep)) log.infov( '-----------------DeepPILCO Iteration # {}-----------------'.format(i + 1)) #Train dynamics train_dynamics_model_pilco(dynamics, dynamics_optimizer, exp_data, epochs=num_itr_dyn, batch_size=dyn_batch_size, plot_train=None, pre_process=pre_process) #dynamics.update_dataset_statistics(exp_data) # Save model save_dir = log_dir utils.save_net_param(dynamics, save_dir, name='dyn_model0', mode='net') # exp_logger = utils.Logger(log_dir, csvname='exp' ) # data = np.concatenate((exp_data.buffer[0], exp_data.buffer[1],exp_data.buffer[2],exp_data.buffer[3],exp_data.buffer[4]), axis=0) # exp_logger.log_table2csv(data) for itr in range(n_iter_algo): reward_sums = [] for n_mpc in range(N_MPC): data_MPC, reward_sum = MPC_rollout(env, mpc_controller, dynamics, horizon=max_timestep, render=False, use_prob=USE_PROB_PREDICT) exp_data.push(data_MPC)
log_str ='[Itr #{}/{} policy optim # {}/{} ]: loss mean: {:.5f}, grad norm:{:.3f}' log.info(log_str.format( (i+1),args.num_iter_algo, (j+1),args.num_iter_policy, loss_mean, grad_norm )) cost_mean ,cost_std = test_episodic_cost2(env, policy,dynamics, N=5, T=T, render=True) log.info('Policy Test : # {} cost mean {:.5f} cost std {:.5f} '.format((i+1) ,cost_mean,cost_std )) # Execute system and record data for num in range(10): exp_data.push(rollout(env, policy, max_steps=T)) # Save model save_dir = log_dir utils.save_net_param(policy, save_dir, name='policy_'+str(i)) utils.save_net_param(dynamics, save_dir, name='dynamics_' + str(i)) # Record data # list_ep_costs.append(torch.cat(list_costs).mean().data.cpu().numpy()[0]) # np.savetxt(log_dir + '/ep_costs', list_ep_costs) # list_test_rewards.append(test_episodic_cost(env, policy, N=50, T=T, render=False)) # np.savetxt(log_dir + '/test_rewards', list_test_rewards) # list_policy_param.append(next(policy.parameters()).data.cpu().numpy()[0]) # np.savetxt(log_dir + '/policy_param', list_policy_param) # list_policy_grad.append(next(policy.parameters()).grad.data.cpu().numpy()[0]) # np.savetxt(log_dir + '/policy_grad', list_policy_grad) logger.log({'itr': i, 'policy_loss': torch.cat(list_costs).mean().data.cpu().numpy()[0], 'cost_mean': cost_mean,
exp_data.push(rollout(env, randpol, max_steps=T, render=False)) log.infov('-----------------DeepPILCO Iteration # {}-----------------') # Train dynamics train_dynamics_model_pilco(dynamics, dynamics_optimizer, exp_data, epochs=num_itr_dyn, batch_size=dyn_batch_size, plot_train=None, pre_process=pre_process, logger=logger) #plot_train_ion # Save model save_dir = log_dir utils.save_net_param(dynamics, save_dir, name='dyn_model', mode='net') # # save_dir = log_dir # (_, _), (x_test, y_test) = load_data() # plot_train(x_test, y_test, dyn_model=dynamics, pre_process=pre_process, save=False, # save_dir=save_dir + '/dyn_fig0.jpg', LengthOfCurve=LengthOfCurve) # (_, _), (x_test, y_test) = load_data(dir_name = '/home/drl/PycharmProjects/DeployedProjects/deepPILCO/MB/data/log-test1.csv',data_num =1000) # plot_train(x_test, y_test, dyn_model=dynamics, pre_process=pre_process, save=True, # save_dir=save_dir + '/dyn_fig_expect.jpg', LengthOfCurve=LengthOfCurve) # # plot_train_std(x_test, y_test, dyn_model=dynamics, pre_process=pre_process, save=True, # save_dir=save_dir + '/dyn_fig_std.jpg', LengthOfCurve=LengthOfCurve)