示例#1
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create envs.
    env = gym.make(env_id)
    logger.debug("Env info")
    logger.debug(env.__doc__)
    logger.debug("-" * 20)
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        if kwargs['eval_env_id']:
            eval_env_id = kwargs['eval_env_id']
        else:
            eval_env_id = env_id
        eval_env = gym.make(eval_env_id)
        # del eval_env_id from kwargs
        del kwargs['eval_env_id']
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        elif 'epsnorm' in current_noise_type:
            _, stddev, epsilon = current_noise_type.split('_')
            action_noise = EpsilonNormalActionNoise(mu=np.zeros(nb_actions),
                                                    sigma=float(stddev) *
                                                    np.ones(nb_actions),
                                                    epsilon=float(epsilon))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_space.shape,
                    observation_shape=env.observation_space.shape)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    tf.reset_default_graph()

    # importing the current skill configs
    if kwargs['look_ahead'] and kwargs['skillset']:
        skillset_file = __import__("HER.skills.%s" % kwargs['skillset'],
                                   fromlist=[''])
        my_skill_set = SkillSet(skillset_file.skillset)
    else:
        my_skill_set = None

    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        logger.info('rank {}: seed={}, logdir={}'.format(
            rank, seed, logger.get_dir()))
        start_time = time.time()
    training.train(env=env,
                   eval_env=eval_env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   my_skill_set=my_skill_set,
                   **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
示例#2
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create envs.
    env = gym.make(env_id)
    env = bench.Monitor(
        env,
        logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        eval_env = gym.make(env_id)
        eval_env = bench.Monitor(eval_env,
                                 os.path.join(logger.get_dir(), 'gym_eval'))
        #env = bench.Monitor(env, None)
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_space.shape,
                    observation_shape=env.observation_space.shape)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()
    training.train(env=env,
                   eval_env=eval_env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create envs.
    env = gym.make(env_id)

    # print(env.action_space.shape)
    logger.info("Env info")
    logger.info(env.__doc__)
    logger.info("-" * 20)
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        if kwargs['eval_env_id']:
            eval_env_id = kwargs['eval_env_id']
        else:
            eval_env_id = env_id
        eval_env = gym.make(eval_env_id)
        # del eval_env_id from kwargs
        del kwargs['eval_env_id']
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None

    tf.reset_default_graph()
    ## this is a HACK
    if kwargs['skillset']:
        # import HER.skills.set2 as skillset_file
        skillset_file = __import__("HER.skills.%s" % kwargs['skillset'],
                                   fromlist=[''])
        my_skill_set = SkillSet(skillset_file.skillset)
        nb_actions = my_skill_set.params + my_skill_set.len

    else:
        nb_actions = env.action_space.shape[-1]

    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        elif 'epsnorm' in current_noise_type:
            _, stddev, epsilon = current_noise_type.split('_')
            action_noise = EpsilonNormalActionNoise(mu=np.zeros(nb_actions),
                                                    sigma=float(stddev) *
                                                    np.ones(nb_actions),
                                                    epsilon=float(epsilon))
        elif 'pepsnorm' in current_noise_type:
            _, stddev, epsilon = current_noise_type.split('_')
            action_noise = EpsilonNormalParameterizedActionNoise(
                mu=np.zeros(my_skill_set.num_params),
                sigma=float(stddev) * np.ones(my_skill_set.num_params),
                epsilon=float(epsilon),
                discrete_actions_dim=my_skill_set.len)
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=(nb_actions, ),
                    observation_shape=env.observation_space.shape)
    if kwargs['newarch']:
        critic = Critic(layer_norm=layer_norm, hidden_unit_list=[400, 300])
    elif kwargs['newcritic']:
        critic = NewCritic(layer_norm=layer_norm)
    else:
        critic = Critic(layer_norm=layer_norm)

    if kwargs['skillset'] is None:
        if kwargs['newarch']:
            actor = Actor(discrete_action_size=env.env.discrete_action_size,
                          cts_action_size=nb_actions -
                          env.env.discrete_action_size,
                          layer_norm=layer_norm,
                          hidden_unit_list=[400, 300])
        else:
            actor = Actor(discrete_action_size=env.env.discrete_action_size,
                          cts_action_size=nb_actions -
                          env.env.discrete_action_size,
                          layer_norm=layer_norm)
        my_skill_set = None
    else:
        # pass
        # get the skillset and make actor accordingly
        if kwargs['newarch']:
            actor = Actor(discrete_action_size=my_skill_set.len,
                          cts_action_size=nb_actions - my_skill_set.len,
                          layer_norm=layer_norm,
                          hidden_unit_list=[400, 300])
        else:
            actor = Actor(discrete_action_size=my_skill_set.len,
                          cts_action_size=nb_actions - my_skill_set.len,
                          layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))

    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()

    training.train(env=env,
                   eval_env=eval_env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   my_skill_set=my_skill_set,
                   **kwargs)

    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
示例#4
0
    boolean_flag(parser, 'look-ahead', default=True)
    parser.add_argument('--exploration-final-eps', type=float, default=0.001)
    parser.add_argument('--num-samples', type=int, default=5)

    boolean_flag(parser, 'select-action', default=True)

    args = parser.parse_args()
    # we don't directly specify timesteps for this script, so make sure that if we do specify them
    # they agree with the other parameters
    if args.num_timesteps is not None:
        assert (args.num_timesteps == args.nb_epochs * args.nb_epoch_cycles *
                args.nb_rollout_steps)
    dict_args = vars(args)
    del dict_args['num_timesteps']
    return dict_args


if __name__ == '__main__':
    args = parse_args()

    if MPI.COMM_WORLD.Get_rank() == 0:
        logger.configure(dir=args["log_dir"])
        logger.set_level(logger.DEBUG)
        logger.info(str(args))

    # Run actual script.
    try:
        run(**args)
    except KeyboardInterrupt:
        print("Exiting!")
示例#5
0
def run(env_id, seed, noise_type, layer_norm, evaluation, memory_size, factor,
        **kwargs):
    # Configure things.
    rank = 0
    if rank != 0:
        logger.set_level(logger.DISABLED)

    dologging = kwargs["dologging"]

    # Create envs.
    env = gym.make(env_id)
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        eval_env = gym.make(env_id)
    else:
        eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.

    single_train = False

    ospace = env.observation_space
    has_image = (not hasattr(ospace, 'shape')) or (not ospace.shape)

    if has_image:
        assert isinstance(env.observation_space, gym.spaces.Tuple)
        env.observation_space.shape = [
            x.shape for x in env.observation_space.spaces
        ]
        #eval_env.observation_space.shape = [x.shape for x in eval_env.observation_space.spaces]

    if rank == 0 or not single_train:
        memory = Memory(limit=memory_size,
                        action_shape=env.action_space.shape,
                        observation_shape=env.observation_space.shape)
    else:
        memory = None

    if has_image:
        ignore = False
        if ignore:
            critic = IgnoreDepthCritic(layer_norm=layer_norm)
            actor = IgnoreDepthActor(nb_actions, layer_norm=layer_norm)
        else:
            critic = DepthCritic(layer_norm=layer_norm)
            if factor:
                actor = FactoredDepthActor(nb_actions, layer_norm=layer_norm)
            else:
                actor = DepthActor(nb_actions, layer_norm=layer_norm)
    else:
        critic = Critic(layer_norm=layer_norm)
        actor = Actor(nb_actions, layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(6)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()

    testing.test(env=env,
                 eval_env=eval_env,
                 param_noise=param_noise,
                 action_noise=action_noise,
                 actor=actor,
                 critic=critic,
                 memory=memory,
                 **kwargs)

    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
示例#6
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = 0
    if rank != 0:
        logger.set_level(logger.DISABLED)

    dologging = kwargs["dologging"]

    # Create envs.
    env = gym.make(env_id)
    gym.logger.setLevel(logging.WARN)

    if evaluation and rank == 0:
        eval_env = gym.make(env_id)
    else:
        eval_env = None

    tf.reset_default_graph()

    if kwargs['skillset']:
        skillset_file = __import__("HER.skills.%s" % kwargs['skillset'],
                                   fromlist=[''])
        my_skill_set = SkillSet(skillset_file.skillset)
        nb_actions = my_skill_set.params + my_skill_set.len

    else:
        nb_actions = env.action_space.shape[-1]

    # Configure components.
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_space.shape,
                    observation_shape=env.observation_space.shape)
    critic = Critic(layer_norm=layer_norm)

    if kwargs['skillset'] is None:
        actor = Actor(discrete_action_size=env.env.discrete_action_size,
                      cts_action_size=nb_actions -
                      env.env.discrete_action_size,
                      layer_norm=layer_norm)
        my_skill_set = None
    else:
        # pass
        # get the skillset and make actor accordingly
        actor = Actor(discrete_action_size=my_skill_set.len,
                      cts_action_size=nb_actions - my_skill_set.len,
                      layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))

    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()

    testing.test(env=env,
                 eval_env=eval_env,
                 param_noise=None,
                 action_noise=None,
                 actor=actor,
                 critic=critic,
                 memory=memory,
                 my_skill_set=my_skill_set,
                 **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))