示例#1
0
def study_cem(params, starting_pol=None) -> None:
    """
    Start a study of CEM algorithms
    :param params: the parameters of the study
    :param starting_pol: initial policy
    :return: nothing
    """
    assert params.policy_type in ['squashedGaussian', 'normal',
                                  'beta'], 'unsupported policy type'
    chrono = Chrono()
    # cuda = torch.device('cuda')
    study = params.gradients
    if params.nb_trajs_cem is not None:
        params.nb_trajs = params.nb_trajs_cem
    simu = make_simu_from_params(params)
    for i in range(1):  # len(study) Only sum here
        simu.env.set_file_name('cem' + study[i] + '_' + simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 32, 64, 1)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 32, 64, 1)
            elif params.policy_type == "beta":
                policy = BetaPolicy(simu.obs_size, 32, 64, 1)
            if starting_pol is not None:
                policy.set_weights(starting_pol[j])
            pw = PolicyWrapper(policy, j, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_ante_', j, plot=False)
            simu.train_cem(pw, params, policy)
            # plot_policy(policy, simu.env, True, simu.env_name, study[i], '_post_', j, plot=False)
    chrono.stop()
示例#2
0
def study_cem(params) -> None:
    """
    Start a sum study of cem
    :param params: the parameters of the study
    :return: nothing
    """

    assert params.policy_type in ['normal'], 'unsupported policy type'
    # cuda = torch.device('cuda')
    study = params.gradients
    simu = make_simu_from_params(params)
    simu.env.set_file_name(study[0] + '_' + simu.env_name)
    reward_file = None
    print("study : ", study)

    # defixed layers
    params.fix_layers = False

    print("cem study") # cem study
    chrono_cem = Chrono()
    for j in range(params.nb_repet):
        simu.env.reinit()
        if params.policy_type=="normal":
            policy = NormalPolicy(simu.obs_size, 24, 36, 1)
        pw = PolicyWrapper(policy, params.policy_type, simu.env_name, j,params.team_name, params.max_episode_steps)
        all_weights,all_rewards,all_pops,all_pops_scores,is_kept=simu.train(pw, params, policy, False, reward_file, "", study[0], 0, True)
    cem_time = chrono_cem.stop()
    return all_weights,all_rewards,all_pops,all_pops_scores,is_kept
def study_regress(params) -> None:
    assert params.policy_type in ['bernoulli', 'normal', 'squashedGaussian'
                                  ], 'unsupported policy type'
    chrono = Chrono()
    study = params.gradients
    simu = make_simu_from_params(params)
    for i in range(len(study)):
        simu.env.set_file_name(study[i] + '_' + simu.env_name)
        policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 24, 36, 1,
                                         params.lr_actor)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 24, 36, 1,
                                      params.lr_actor)
            elif params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 24, 36, 1,
                                                params.lr_actor)
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_ante_',
                        j,
                        plot=False)

            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36,
                                            1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            # plot_critic(simu, critic, policy, study[i], '_ante_', j)

            regress(simu, policy, params.policy_type, 250, params.render)
            simu.train(pw, params, policy, critic, policy_loss_file,
                       critic_loss_file, study[i])
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_post_',
                        j,
                        plot=False)
            plot_critic(simu, critic, policy, study[i], '_post_', j)
            critic.save_model('data/critics/' + params.env_name + '#' +
                              params.team_name + '#' + study[i] + str(j) +
                              '.pt')
    chrono.stop()
示例#4
0
def get_same_starting_policies(params):
    simu = make_simu_from_params(params)
    policies = []
    for i in range(params.nb_repet):
        if params.policy_type == 'normal':
            policies.append(
                NormalPolicy(simu.obs_size, 32, 64, 1,
                             params.lr_actor).get_weights())
        elif params.policy_type == 'squashedGaussian':
            policies.append(
                SquashedGaussianPolicy(simu.obs_size, 32, 64, 1,
                                       params.lr_actor).get_weights())
        elif params.policy_type == 'beta':
            policies.append(
                BetaPolicy(simu.obs_size, 32, 64, 1,
                           params.lr_actor).get_weights())
    return policies
示例#5
0
def study_beta(params):
    simu = make_simu_from_params(params)
    for beta in [0.1, 0.5, 1.0, 5.0, 10.0]:
        print("beta:", beta)
        policy_loss_file, critic_loss_file = set_files(str(beta), simu.env_name)
        simu.env.set_file_name(str(beta) + '_' + simu.env_name)
        for i in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 24, 36, 1, params.lr_actor)
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 24, 36, 1, params.lr_actor)
            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36, 1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name, params.team_name, params.max_episode_steps)
            simu.train(pw, params, policy, critic, policy_loss_file, critic_loss_file, "beta", beta)
def study_pg(params) -> None:
    """
    Start a study of the policy gradient algorithms
    :param params: the parameters of the study
    :return: nothing
    """
    #### MODIF : added discrete
    assert params.policy_type in [
        'bernoulli', 'normal', 'squashedGaussian', 'discrete'
    ], 'unsupported policy type'
    ####
    chrono = Chrono()
    # cuda = torch.device('cuda')
    study = params.gradients
    simu = make_simu_from_params(params)
    for i in range(len(study)):
        simu.env.set_file_name(study[i] + '_' + simu.env_name)
        policy_loss_file, critic_loss_file = set_files(study[i], simu.env_name)
        print("study : ", study[i])
        for j in range(params.nb_repet):
            simu.env.reinit()
            if params.policy_type == "bernoulli":
                policy = BernoulliPolicy(simu.obs_size, 100, 200, 1,
                                         params.lr_actor)
            #### MODIF : added the discrete policy
            elif params.policy_type == "discrete":
                if isinstance(simu.env.action_space, gym.spaces.box.Box):
                    nb_actions = int(simu.env.action_space.high[0] -
                                     simu.env.action_space.low[0] + 1)
                    print(
                        "Error : environment action space is not discrete :" +
                        str(simu.env.action_space))
                else:
                    nb_actions = simu.env.action_space.n
                policy = DiscretePolicy(simu.obs_size, 24, 36, nb_actions,
                                        params.lr_actor)
            ####
            elif params.policy_type == "normal":
                policy = NormalPolicy(simu.obs_size, 100, 200, 1,
                                      params.lr_actor)
            elif params.policy_type == "squashedGaussian":
                policy = SquashedGaussianPolicy(simu.obs_size, 100, 200, 1,
                                                params.lr_actor)
            elif params.policy_type == "DDPG":
                policy = DDPG(simu.obs_size, 24, 36, 1, params.lr_actor)
            # policy = policy.cuda()
            pw = PolicyWrapper(policy, params.policy_type, simu.env_name,
                               params.team_name, params.max_episode_steps)
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_ante_',
                        j,
                        plot=False)

            if not simu.discrete:
                act_size = simu.env.action_space.shape[0]
                critic = QNetworkContinuous(simu.obs_size + act_size, 24, 36,
                                            1, params.lr_critic)
            else:
                critic = VNetwork(simu.obs_size, 24, 36, 1, params.lr_critic)
            # plot_critic(simu, critic, policy, study[i], '_ante_', j)

            simu.train(pw, params, policy, critic, policy_loss_file,
                       critic_loss_file, study[i])
            plot_policy(policy,
                        simu.env,
                        True,
                        simu.env_name,
                        study[i],
                        '_post_',
                        j,
                        plot=False)
            if False:
                if params.policy_type == "normal":
                    plot_normal_histograms(policy, j, simu.env_name)
                else:
                    plot_weight_histograms(policy, j, simu.env_name)
        plot_critic(simu, critic, policy, study[i], '_post_', j)
        critic.save_model('data/critics/' + params.env_name + '#' +
                          params.team_name + '#' + study[i] + str(j) + '.pt')
    chrono.stop()