def run(env_id, seed, evaluation, **kwargs):

    # Create envs.
    env = gym.make(env_id)

    # print(env.action_space.shape)
    logger.info("Env info")
    logger.info(env.__doc__)
    logger.info("-" * 20)
    gym.logger.setLevel(logging.WARN)

    if kwargs['skillset']:
        skillset_file = __import__("HER.skills.%s" % kwargs['skillset'],
                                   fromlist=[''])
        my_skill_set = SkillSet(skillset_file.skillset)
    else:
        my_skill_set = None

    set_global_seeds(seed)
    env.seed(seed)

    model_path = os.path.join(kwargs['restore_dir'], "model")
    testing.testing(env, model_path, my_skill_set, kwargs['render_eval'],
                    kwargs['commit_for'], kwargs['nb_eval_episodes'])

    env.close()
Exemplo n.º 2
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create envs.
    env = gym.make(env_id)
    logger.debug("Env info")
    logger.debug(env.__doc__)
    logger.debug("-" * 20)
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        if kwargs['eval_env_id']:
            eval_env_id = kwargs['eval_env_id']
        else:
            eval_env_id = env_id
        eval_env = gym.make(eval_env_id)
        # del eval_env_id from kwargs
        del kwargs['eval_env_id']
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        elif 'epsnorm' in current_noise_type:
            _, stddev, epsilon = current_noise_type.split('_')
            action_noise = EpsilonNormalActionNoise(mu=np.zeros(nb_actions),
                                                    sigma=float(stddev) *
                                                    np.ones(nb_actions),
                                                    epsilon=float(epsilon))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_space.shape,
                    observation_shape=env.observation_space.shape)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    tf.reset_default_graph()

    # importing the current skill configs
    if kwargs['look_ahead'] and kwargs['skillset']:
        skillset_file = __import__("HER.skills.%s" % kwargs['skillset'],
                                   fromlist=[''])
        my_skill_set = SkillSet(skillset_file.skillset)
    else:
        my_skill_set = None

    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        logger.info('rank {}: seed={}, logdir={}'.format(
            rank, seed, logger.get_dir()))
        start_time = time.time()
    training.train(env=env,
                   eval_env=eval_env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   my_skill_set=my_skill_set,
                   **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
Exemplo n.º 3
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create envs.
    env = gym.make(env_id)
    env = bench.Monitor(
        env,
        logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        eval_env = gym.make(env_id)
        eval_env = bench.Monitor(eval_env,
                                 os.path.join(logger.get_dir(), 'gym_eval'))
        #env = bench.Monitor(env, None)
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_space.shape,
                    observation_shape=env.observation_space.shape)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()
    training.train(env=env,
                   eval_env=eval_env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
Exemplo n.º 4
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create envs.
    env = gym.make(env_id)

    # print(env.action_space.shape)
    logger.info("Env info")
    logger.info(env.__doc__)
    logger.info("-" * 20)
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        if kwargs['eval_env_id']:
            eval_env_id = kwargs['eval_env_id']
        else:
            eval_env_id = env_id
        eval_env = gym.make(eval_env_id)
        # del eval_env_id from kwargs
        del kwargs['eval_env_id']
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None

    tf.reset_default_graph()
    ## this is a HACK
    if kwargs['skillset']:
        # import HER.skills.set2 as skillset_file
        skillset_file = __import__("HER.skills.%s" % kwargs['skillset'],
                                   fromlist=[''])
        my_skill_set = SkillSet(skillset_file.skillset)
        nb_actions = my_skill_set.params + my_skill_set.len

    else:
        nb_actions = env.action_space.shape[-1]

    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        elif 'epsnorm' in current_noise_type:
            _, stddev, epsilon = current_noise_type.split('_')
            action_noise = EpsilonNormalActionNoise(mu=np.zeros(nb_actions),
                                                    sigma=float(stddev) *
                                                    np.ones(nb_actions),
                                                    epsilon=float(epsilon))
        elif 'pepsnorm' in current_noise_type:
            _, stddev, epsilon = current_noise_type.split('_')
            action_noise = EpsilonNormalParameterizedActionNoise(
                mu=np.zeros(my_skill_set.num_params),
                sigma=float(stddev) * np.ones(my_skill_set.num_params),
                epsilon=float(epsilon),
                discrete_actions_dim=my_skill_set.len)
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=(nb_actions, ),
                    observation_shape=env.observation_space.shape)
    if kwargs['newarch']:
        critic = Critic(layer_norm=layer_norm, hidden_unit_list=[400, 300])
    elif kwargs['newcritic']:
        critic = NewCritic(layer_norm=layer_norm)
    else:
        critic = Critic(layer_norm=layer_norm)

    if kwargs['skillset'] is None:
        if kwargs['newarch']:
            actor = Actor(discrete_action_size=env.env.discrete_action_size,
                          cts_action_size=nb_actions -
                          env.env.discrete_action_size,
                          layer_norm=layer_norm,
                          hidden_unit_list=[400, 300])
        else:
            actor = Actor(discrete_action_size=env.env.discrete_action_size,
                          cts_action_size=nb_actions -
                          env.env.discrete_action_size,
                          layer_norm=layer_norm)
        my_skill_set = None
    else:
        # pass
        # get the skillset and make actor accordingly
        if kwargs['newarch']:
            actor = Actor(discrete_action_size=my_skill_set.len,
                          cts_action_size=nb_actions - my_skill_set.len,
                          layer_norm=layer_norm,
                          hidden_unit_list=[400, 300])
        else:
            actor = Actor(discrete_action_size=my_skill_set.len,
                          cts_action_size=nb_actions - my_skill_set.len,
                          layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))

    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()

    training.train(env=env,
                   eval_env=eval_env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   my_skill_set=my_skill_set,
                   **kwargs)

    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
Exemplo n.º 5
0
def run(env_id,
        render,
        log_dir,
        restore_dir,
        commit_for,
        train_epoch,
        batch_size=32,
        lr=1e-3,
        seed=0,
        dataset_size=2000):

    env = gym.make(env_id)
    observation_shape = env.observation_space.shape[-1]
    global in_size, out_size
    in_size = observation_shape
    out_size = observation_shape - 3

    set_global_seeds(seed)
    env.seed(seed)

    with U.single_threaded_session() as sess:
        actor_model = DDPGSkill(observation_shape=(observation_shape, ),
                                skill_name="skill",
                                nb_actions=env.action_space.shape[-1],
                                restore_path=restore_dir)

        print("Assumption: Goal is 3d target location")

        pred_model = regressor(in_shape=in_size,
                               out_shape=out_size,
                               name="suc_pred_model",
                               sess=sess,
                               log_dir=log_dir)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer(),
            # train_iter.initializer, test_iter.initializer
        )
        sess.run(init_op)

        # restore actor
        actor_model.restore_skill(path=get_home_path(
            osp.expanduser(restore_dir)),
                                  sess=sess)

        generate_data(env, env_id, log_dir, actor_model, dataset_size,
                      commit_for, render)

        exit(1)
        ## creating dataset tensors
        csv_filename = osp.join(log_dir, "%s.csv" % env_id)
        # base_dataset = tf.data.TextLineDataset(csv_filename)

        # train_dataset = base_dataset.filter(in_training_set).map(decode_line).shuffle(buffer_size=5*batch_size, seed =seed).repeat().batch(batch_size)
        # train_iter = train_dataset.make_initializable_iterator()
        # train_el = train_iter.get_next()

        # test_dataset = base_dataset.filter(in_test_set).map(decode_line).batch(batch_size)
        # test_iter = test_dataset.make_initializable_iterator()
        # test_el = test_iter.get_next()

        ##
        base_dataset = pd.read_csv(csv_filename)
        train, test = train_test_split(base_dataset, test_size=0.2)
        # print(train.shape, test.shape)

        # whiten
        train_mean = np.mean(train, axis=0)
        train_std = np.std(train, axis=0)

        # save mean and var
        statistics = np.concatenate((train_mean, train_std))
        with open(osp.join(log_dir, "%s_stat.npy" % env_id), 'wb') as f:
            np.save(f, statistics)
        # create pd
        train_dataset = ((train - train_mean) / train_std)
        test_dataset = ((test - train_mean) / train_std)
        test_dataset = test_dataset.values
        test_dataset = [test_dataset[:, :in_size], test_dataset[:, in_size:]]
        ####

        print(train_dataset.shape, test_dataset[0].shape)
        pred_model.train(train_epoch, batch_size, lr, train_dataset,
                         test_dataset)
        pred_model.save()
Exemplo n.º 6
0
def run(env_id, seed, noise_type, layer_norm, evaluation, memory_size, factor,
        **kwargs):
    # Configure things.
    rank = 0
    if rank != 0:
        logger.set_level(logger.DISABLED)

    dologging = kwargs["dologging"]

    # Create envs.
    env = gym.make(env_id)
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        eval_env = gym.make(env_id)
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.

    single_train = False

    ospace = env.observation_space
    has_image = (not hasattr(ospace, 'shape')) or (not ospace.shape)

    if has_image:
        assert isinstance(env.observation_space, gym.spaces.Tuple)
        env.observation_space.shape = [
            x.shape for x in env.observation_space.spaces
        ]
        #eval_env.observation_space.shape = [x.shape for x in eval_env.observation_space.spaces]

    if rank == 0 or not single_train:
        memory = Memory(limit=memory_size,
                        action_shape=env.action_space.shape,
                        observation_shape=env.observation_space.shape)
    else:
        memory = None

    if has_image:
        ignore = False
        if ignore:
            critic = IgnoreDepthCritic(layer_norm=layer_norm)
            actor = IgnoreDepthActor(nb_actions, layer_norm=layer_norm)
        else:
            critic = DepthCritic(layer_norm=layer_norm)
            if factor:
                actor = FactoredDepthActor(nb_actions, layer_norm=layer_norm)
            else:
                actor = DepthActor(nb_actions, layer_norm=layer_norm)
    else:
        critic = Critic(layer_norm=layer_norm)
        actor = Actor(nb_actions, layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(6)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()

    testing.test(env=env,
                 eval_env=eval_env,
                 param_noise=param_noise,
                 action_noise=action_noise,
                 actor=actor,
                 critic=critic,
                 memory=memory,
                 **kwargs)

    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
Exemplo n.º 7
0
def run(env_id,
        render,
        log_dir,
        train_epoch,
        batch_size=32,
        lr=1e-3,
        seed=0,
        whiten=False):

    env = gym.make(env_id)
    observation_shape = env.observation_space.shape[-1]
    global in_size, out_size
    in_size = observation_shape
    out_size = observation_shape - 3

    set_global_seeds(seed)
    # env.seed(seed)

    with U.single_threaded_session() as sess:

        ## creating dataset tensors
        csv_filename = osp.join(log_dir, "%s.csv" % env_id)

        ##
        base_dataset = np.loadtxt(csv_filename, delimiter=',')
        train, test = train_test_split(base_dataset, test_size=0.2)

        # NN error
        nn_error = get_nn_error(train, test, in_size)

        print("memory based nn error", nn_error)

        # whiten
        if whiten:
            train_feat_mean = np.mean(train, axis=0)
            train_feat_std = np.std(train, axis=0)

            # save mean and var
            statistics = np.concatenate((train_feat_mean, train_feat_std))
            with open(osp.join(log_dir, "%s_stat.npy" % env_id), 'wb') as f:
                np.save(f, statistics)

            # create pd
            train_dataset = ((train - train_feat_mean) /
                             (train_feat_std + eps))
            # print(train_dataset.shape, train_labels[:, np.newaxis].shape)
            train_dataset = pd.DataFrame(train_dataset)

            test_dataset = ((test - train_feat_mean) / (train_feat_std + eps))
            ####

            print(train_dataset.shape, test_dataset[0].shape)
            whiten_data = [train_feat_mean[in_size:], train_feat_std[in_size:]]
        else:
            # train_dataset = pd.DataFrame(np.concatenate((train_feat, train_labels[:, np.newaxis]),axis=1))
            train_dataset = pd.DataFrame(train)
            test_dataset = test  #pd.DataFrame(test)#[test[:, :-1], test[:,[-1]]]
            whiten_data = None

        pred_model = regressor(in_shape=in_size,
                               out_shape=out_size,
                               name="succmodel",
                               sess=sess,
                               log_dir=log_dir,
                               whiten_data=whiten_data)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer(),
        )
        sess.run(init_op)

        pred_model.train(train_epoch, batch_size, lr, train_dataset,
                         test_dataset)
        pred_model.save()
Exemplo n.º 8
0
def run(env_id,
        render,
        log_dir,
        train_epoch,
        batch_size=32,
        lr=1e-3,
        seed=0,
        whiten=False):

    env = gym.make(env_id)
    observation_shape = env.observation_space.shape[-1]
    global in_size, out_size
    in_size = observation_shape
    out_size = 1

    set_global_seeds(seed)
    env.seed(seed)

    with U.single_threaded_session() as sess:

        pred_model = classifier(in_shape=in_size,
                                out_shape=out_size,
                                name="suc_pred_model",
                                sess=sess,
                                log_dir=log_dir)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer(),
        )
        sess.run(init_op)

        ## creating dataset tensors
        csv_filename = osp.join(log_dir, "%s_data.csv" % env_id)

        ##
        base_dataset = np.loadtxt(csv_filename, delimiter=',')
        train, test = train_test_split(base_dataset, test_size=0.2)
        train_feat = train[:, :-1]
        train_labels = train[:, -1]
        # print(train.shape, test.shape)

        # whiten
        if whiten:
            train_feat_mean = np.mean(train_feat, axis=0)
            train_feat_std = np.std(train_feat, axis=0)

            # save mean and var
            statistics = np.concatenate((train_feat_mean, train_feat_std))
            with open(osp.join(log_dir, "%s_stat.npy" % env_id), 'wb') as f:
                np.save(f, statistics)

            # create pd
            train_feat_dataset = ((train_feat - train_feat_mean) /
                                  train_feat_std)
            print(train_feat_dataset.shape, train_labels[:, np.newaxis].shape)
            train_dataset = pd.DataFrame(
                np.concatenate(
                    (train_feat_dataset, train_labels[:, np.newaxis]), axis=1))

            test_feat_dataset = ((test[:, :-1] - train_feat_mean) /
                                 train_feat_std)
            test_dataset = [test_feat_dataset, test[:, [-1]]]
            ####

            print(train_dataset.shape, test_dataset[0].shape)
        else:
            train_dataset = pd.DataFrame(
                np.concatenate((train_feat, train_labels[:, np.newaxis]),
                               axis=1))
            test_dataset = [test[:, :-1], test[:, [-1]]]

        pred_model.train(train_epoch, batch_size, lr, train_dataset,
                         test_dataset)
        pred_model.save()
Exemplo n.º 9
0
def run(env_id, seed, evaluation, **kwargs):
    
    # Create envs.
    env = gym.make(env_id)

    # print(env.action_space.shape)
    logger.info("Env info")
    logger.info(env.__doc__)
    logger.info("-"*20)
    gym.logger.setLevel(logging.WARN)

    if evaluation:
        if kwargs['eval_env_id']: 
            eval_env_id = kwargs['eval_env_id']
        else: 
            eval_env_id = env_id
        eval_env = gym.make(eval_env_id)
        # del eval_env_id from kwargs
        del kwargs['eval_env_id']
    else:
        eval_env = None

    
    if kwargs['skillset']:
        skillset_file = __import__("HER.skills.%s"%kwargs['skillset'], fromlist=[''])
        my_skill_set = SkillSet(skillset_file.skillset)

    model = models.mlp([64])

    # Seed everything to make things reproducible.
    logger.info('seed={}, logdir={}'.format(seed, logger.get_dir()))
    
    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    start_time = time.time()
    
    training.train(
        env=env,
        eval_env = eval_env,
        q_func=model,
        lr=kwargs['lr'],
        max_timesteps=kwargs['num_timesteps'],
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.002,
        train_freq=1,
        batch_size=kwargs['batch_size'],
        print_freq=100,
        checkpoint_freq=kwargs['save_freq'],
        learning_starts=max(50, kwargs['batch_size']),
        target_network_update_freq=100,
        prioritized_replay= kwargs['prioritized_replay'],
        prioritized_replay_alpha=0.6,
        prioritized_replay_beta0=0.4,
        prioritized_replay_beta_iters=None,
        prioritized_replay_eps=1e-6,
        param_noise=False,
        gamma = kwargs['gamma'],
        log_dir = kwargs['log_dir'],
        my_skill_set= my_skill_set,
        num_eval_episodes=kwargs['num_eval_episodes'],
        render = kwargs['render'],
        render_eval = kwargs['render_eval'],
        commit_for = kwargs['commit_for']
    )
    
    env.close()
    if eval_env is not None:
        eval_env.close()
    
    logger.info('total runtime: {}s'.format(time.time() - start_time))
Exemplo n.º 10
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = 0
    if rank != 0:
        logger.set_level(logger.DISABLED)

    dologging = kwargs["dologging"]

    # Create envs.
    env = gym.make(env_id)
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        eval_env = gym.make(env_id)
    else:
        eval_env = None

    tf.reset_default_graph()

    if kwargs['skillset']:
        skillset_file = __import__("HER.skills.%s" % kwargs['skillset'],
                                   fromlist=[''])
        my_skill_set = SkillSet(skillset_file.skillset)
        nb_actions = my_skill_set.params + my_skill_set.len

    else:
        nb_actions = env.action_space.shape[-1]

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_space.shape,
                    observation_shape=env.observation_space.shape)
    critic = Critic(layer_norm=layer_norm)

    if kwargs['skillset'] is None:
        actor = Actor(discrete_action_size=env.env.discrete_action_size,
                      cts_action_size=nb_actions -
                      env.env.discrete_action_size,
                      layer_norm=layer_norm)
        my_skill_set = None
    else:
        # pass
        # get the skillset and make actor accordingly
        actor = Actor(discrete_action_size=my_skill_set.len,
                      cts_action_size=nb_actions - my_skill_set.len,
                      layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))

    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()

    testing.test(env=env,
                 eval_env=eval_env,
                 param_noise=None,
                 action_noise=None,
                 actor=actor,
                 critic=critic,
                 memory=memory,
                 my_skill_set=my_skill_set,
                 **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))