def __init__(self, params):

        #######################
        ## AGENT PARAMS
        #######################

        agent_params = {
            'n_layers': params['n_layers'],
            'size': params['size'],
            'learning_rate': params['learning_rate'],
            'max_replay_buffer_size': params['max_replay_buffer_size'],
        }

        self.params = params
        self.params['agent_class'] = BCAgent  ## HW1: you will modify this
        self.params['agent_params'] = agent_params

        ################
        ## RL TRAINER
        ################

        self.rl_trainer = RL_Trainer(self.params)  ## HW1: you will modify this

        #######################
        ## LOAD EXPERT POLICY
        #######################

        print('Loading expert policy from...',
              self.params['expert_policy_file'])
        self.loaded_expert_policy = LoadedGaussianPolicy(
            self.params['expert_policy_file'])
        print('Done restoring expert policy...')
示例#2
0
    def __init__(self, params):

        #######################
        ## AGENT PARAMS
        #######################

        agent_params = {
            "n_layers": params["n_layers"],
            "size": params["size"],
            "learning_rate": params["learning_rate"],
            "max_replay_buffer_size": params["max_replay_buffer_size"],
        }

        self.params = params
        self.params["agent_class"] = BCAgent  ## HW1: you will modify this
        self.params["agent_params"] = agent_params

        ################
        ## RL TRAINER
        ################

        self.rl_trainer = RL_Trainer(self.params)  ## HW1: you will modify this

        #######################
        ## LOAD EXPERT POLICY
        #######################

        print("Loading expert policy from...", self.params["expert_policy_file"])
        self.loaded_expert_policy = LoadedGaussianPolicy(
            self.params["expert_policy_file"]
        )
        print("Done restoring expert policy...")
示例#3
0
    def __init__(self, params):

        #######################
        ## AGENT PARAMS
        #######################

        agent_params = {
            'n_layers': params['n_layers'],
            'size': params['size'],
            'learning_rate': params['learning_rate'],
            'max_replay_buffer_size': params['max_replay_buffer_size'],
            'siren': params['siren'],
            'train_separate_params': params['train_separate_params'],
            'supervision_mode': params['supervision_mode'],
            'offset_learning_rate': params['offset_learning_rate'],
            'epsilon_s': params['epsilon_s'],
            'auto_cast': params['auto_cast'],
            'gradient_loss_scale': params['gradient_loss_scale'],
            'additional_activation': params['additional_activation'],
            'omega': params['omega'],
        }

        self.params = params
        self.params['agent_class'] = BCAgent  ## HW1: you will modify this
        self.params['agent_params'] = agent_params

        ################
        ## RL TRAINER
        ################

        self.rl_trainer = RL_Trainer(self.params)  ## HW1: you will modify this

        #######################
        ## LOAD EXPERT POLICY
        #######################

        print('Loading expert policy from...',
              self.params['expert_policy_file'])
        self.loaded_expert_policy = LoadedGaussianPolicy(
            self.params['expert_policy_file'])
        print('Done restoring expert policy...')
示例#4
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--emp_epoch', type=int, default=6)  # number of gradient steps for training policy (per iter in n_iter)
    parser.add_argument('--eta', type=int, default=40)
    parser.add_argument('--bins', type=int, default=20)
    parser.add_argument('--load_dict', type=bool, default=False)
    parser.add_argument('--load_path', type=str)
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--vertices', type=int, default=6)
    parser.add_argument('--obs_space', type=int, default=17)

    parser.add_argument('--expert_policy_file', '-epf', type=str, required=True)  # relative to where you're running this script from
    parser.add_argument('--expert_data', '-ed', type=str, required=True) #relative to where you're running this script from
    parser.add_argument('--env_name', '-env', type=str, help='choices: Ant-v2, Humanoid-v2, Walker-v2, HalfCheetah-v2, Hopper-v2', required=True)
    parser.add_argument('--exp_name', '-exp', type=str, default='pick an experiment name', required=True)
    parser.add_argument('--do_dagger', action='store_true')
    parser.add_argument('--ep_len', type=int)

    parser.add_argument('--num_agent_train_steps_per_iter', type=int, default=1000)  # number of gradient steps for training policy (per iter in n_iter)
    parser.add_argument('--n_iter', '-n', type=int, default=1)

    parser.add_argument('--eval_batch_size', type=int,
                        default=1000)  # eval data collected (in the env) for logging metrics
    parser.add_argument('--train_batch_size', type=int,
                        default=100)  # number of sampled data points to be used per gradient/train step

    parser.add_argument('--n_layers', type=int, default=2)  # depth, of policy to be learned
    parser.add_argument('--size', type=int, default=64)  # width of each layer, of policy to be learned
    parser.add_argument('--learning_rate', '-lr', type=float, default=5e-3)  # LR for supervised learning

    parser.add_argument('--video_log_freq', type=int, default=5)
    parser.add_argument('--scalar_log_freq', type=int, default=1)
    parser.add_argument('--no_gpu', '-ngpu', action='store_true')
    parser.add_argument('--which_gpu', type=int, default=0)
    parser.add_argument('--max_replay_buffer_size', type=int, default=1000000)
    parser.add_argument('--save_params', action='store_true')
    parser.add_argument('--seed', type=int, default=2)
    args = parser.parse_args()

    # convert args to dictionary
    params = vars(args)

    #############
    ## EXPERT POLICY
    #############

    print('Loading expert policy from...', params['expert_policy_file'])
    expert_policy = LoadedGaussianPolicy(params['expert_policy_file'])
    print('Done restoring expert policy...')


    ###################
    ### INIT PATH
    ###################
    params['filestem'] = 'cs285/data/emp_dagger_buckets_' + str(params['env_name']) + '_' + str(params['bins']) + '_epoch'\
                         + str(params['emp_epoch']) + '_eta' + str(int(params['eta']))

    ###################
    ### RUN TRAINING
    ###################

    trainer = EMP_Trainer(params)
    trainer.run_training_loop(params['n_iter'], expert_policy)
示例#5
0
        obs, rew, done, info = unwrapped_env.orig_step_(action_cont)

        return (obs, rew, done, info)

    # change observation space
    env.action_space = spaces.MultiDiscrete([[0, K - 1]
                                             for _ in range(naction)])

    unwrapped_env.step = discretizing_step
    unwrapped_env.reset = discretizing_reset

    return env


# Make the gym environment
expert_policy = LoadedGaussianPolicy('cs285/policies/experts/Walker2d.pkl')

env = gym.make("Hopper-v2")
obs = env.reset()
print("obs shape", obs.shape)
print("low", env.action_space.low)
print("high", env.action_space.high)
env.close()
count = 0
for _ in range(10000):
    action = expert_policy.get_action(obs)
    print("action", action)
    obs, reward, done, info = env.step(action)  # take a random action
    # print("reward", reward)
    if done:
        print("Done")