示例#1
0
    exp_data.push(rollout(env, randpol, max_steps=max_timestep))

log.infov(
    '-----------------DeepPILCO Iteration # {}-----------------'.format(i + 1))
#Train dynamics
train_dynamics_model_pilco(dynamics,
                           dynamics_optimizer,
                           exp_data,
                           epochs=num_itr_dyn,
                           batch_size=dyn_batch_size,
                           plot_train=None,
                           pre_process=pre_process)
#dynamics.update_dataset_statistics(exp_data)
# Save model
save_dir = log_dir
utils.save_net_param(dynamics, save_dir, name='dyn_model0', mode='net')

# exp_logger = utils.Logger(log_dir, csvname='exp'  )
# data = np.concatenate((exp_data.buffer[0], exp_data.buffer[1],exp_data.buffer[2],exp_data.buffer[3],exp_data.buffer[4]), axis=0)
# exp_logger.log_table2csv(data)

for itr in range(n_iter_algo):
    reward_sums = []
    for n_mpc in range(N_MPC):
        data_MPC, reward_sum = MPC_rollout(env,
                                           mpc_controller,
                                           dynamics,
                                           horizon=max_timestep,
                                           render=False,
                                           use_prob=USE_PROB_PREDICT)
        exp_data.push(data_MPC)
            log_str ='[Itr #{}/{} policy optim # {}/{} ]: loss mean: {:.5f},   grad norm:{:.3f}'

            log.info(log_str.format( (i+1),args.num_iter_algo,
                                  (j+1),args.num_iter_policy,
                                  loss_mean,   grad_norm ))

    cost_mean ,cost_std = test_episodic_cost2(env, policy,dynamics, N=5, T=T, render=True)
    log.info('Policy Test : # {}  cost mean {:.5f}  cost std {:.5f} '.format((i+1) ,cost_mean,cost_std ))

    # Execute system and record data
    for num in range(10):
        exp_data.push(rollout(env, policy, max_steps=T))
    
    # Save model
    save_dir = log_dir
    utils.save_net_param(policy, save_dir, name='policy_'+str(i))
    utils.save_net_param(dynamics, save_dir, name='dynamics_' + str(i))

    # Record data
    # list_ep_costs.append(torch.cat(list_costs).mean().data.cpu().numpy()[0])
    # np.savetxt(log_dir + '/ep_costs', list_ep_costs)
    # list_test_rewards.append(test_episodic_cost(env, policy, N=50, T=T, render=False))
    # np.savetxt(log_dir + '/test_rewards', list_test_rewards)
    # list_policy_param.append(next(policy.parameters()).data.cpu().numpy()[0])
    # np.savetxt(log_dir + '/policy_param', list_policy_param)
    # list_policy_grad.append(next(policy.parameters()).grad.data.cpu().numpy()[0])
    # np.savetxt(log_dir + '/policy_grad', list_policy_grad)

    logger.log({'itr': i,
                'policy_loss': torch.cat(list_costs).mean().data.cpu().numpy()[0],
                'cost_mean': cost_mean,
示例#3
0
    exp_data.push(rollout(env, randpol, max_steps=T, render=False))

log.infov('-----------------DeepPILCO Iteration # {}-----------------')

# Train dynamics

train_dynamics_model_pilco(dynamics,
                           dynamics_optimizer,
                           exp_data,
                           epochs=num_itr_dyn,
                           batch_size=dyn_batch_size,
                           plot_train=None,
                           pre_process=pre_process,
                           logger=logger)  #plot_train_ion

# Save model
save_dir = log_dir
utils.save_net_param(dynamics, save_dir, name='dyn_model', mode='net')

#
# save_dir = log_dir
# (_, _), (x_test, y_test) = load_data()
# plot_train(x_test, y_test, dyn_model=dynamics, pre_process=pre_process, save=False,
#            save_dir=save_dir + '/dyn_fig0.jpg', LengthOfCurve=LengthOfCurve)
# (_, _), (x_test, y_test) = load_data(dir_name = '/home/drl/PycharmProjects/DeployedProjects/deepPILCO/MB/data/log-test1.csv',data_num =1000)
# plot_train(x_test, y_test, dyn_model=dynamics, pre_process=pre_process, save=True,
#            save_dir=save_dir + '/dyn_fig_expect.jpg', LengthOfCurve=LengthOfCurve)
#
# plot_train_std(x_test, y_test, dyn_model=dynamics, pre_process=pre_process, save=True,
#            save_dir=save_dir + '/dyn_fig_std.jpg', LengthOfCurve=LengthOfCurve)