示例#1
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'field':
        import gym_fieldedmove
        env = gym.make('FieldedMove-v0')
        # env = FrameStack(env, 4)
    elif args["env_kind"] == "ple":
        import gym_ple
        env = gym.make(args['env'])
        env._max_episode_steps = args['max_episode_steps']
        # env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
示例#2
0
class Evaluator(object):
    def __init__(self, env_name, num_episodes, exp_name, policy):
        self.exp_name = exp_name
        self.env = gym.make(env_name)
        self.env = ProcessFrame84(self.env, crop=False)
        self.env = FrameStack(self.env, 4)
        self.num_episodes = 1
        self.ep_len = 4500
        self.policy = policy
        if not os.path.exists('images'):
            os.mkdir('images')
        self.image_folder = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), 'images')

    def format_obs(self, obs_name, obs):
        nums = ",".join(map(str, obs))
        dict_format = "{" + nums + "}"
        final_str = "observation \"{}\" - {}\n".format(obs_name, dict_format)
        return final_str

    def eval_model(self, ep_num):
        for i in range(self.num_episodes):
            trajectory_file = self.exp_name + "_ep" + str(
                ep_num) + "_itr" + str(i) + "_trajectory.txt"
            if not os.path.exists("trajectories"):
                os.makedirs("trajectories")
            trajectory_path = os.path.join("trajectories", trajectory_file)
            ep_images = []
            ob = self.env.reset()
            ob = np.array(ob)
            eprews = []
            if i == 0:
                ep_images.append(self.env.unwrapped._last_observation)
            for step in range(self.ep_len):
                action, vpred, nlp = self.policy.get_ac_value_nlp_eval(ob)
                ob, rew, done, info = self.env.step(action[0])
                if i == 0:
                    ep_images.append(self.env.unwrapped._last_observation)
                if rew is None:
                    eprews.append(0)
                else:
                    eprews.append(rew)
                if step > 0:
                    pos_trans, pos_rot, vel_trans, vel_rot = self.env.unwrapped.get_pos_and_vel(
                    )

                    with open(trajectory_path, 'a') as f:
                        f.write(self.format_obs("DEBUG.POS.TRANS", pos_trans))
                        f.write(self.format_obs("DEBUG.POS.ROT", pos_rot))
                        f.write(self.format_obs("VEL.TRANS", vel_trans))
                        f.write(self.format_obs("VEL.ROT", vel_rot))

            for j in range(len(ep_images)):
                image_file = os.path.join(
                    self.image_folder,
                    self.exp_name + "_{}_{}_{}_".format(ep_num, i, j) + ".png")
                cv2.imwrite(image_file, ep_images[j])
            print("Episode {} cumulative reward: {}".format(i, sum(eprews)))
示例#3
0
def train():
    # Fetch the requested environment set in flags.
    env_class = attrgetter(FLAGS.env)(sc2g.env)

    env = env_class.make_env(
        map_name=FLAGS.map_name,
        feature_screen_size=FLAGS.screen_size,
        feature_minimap_size=FLAGS.minimap_size,
        visualize=FLAGS.visualize,
        save_replay_episodes=FLAGS.save_replay_episodes,
        replay_dir=FLAGS.replay_dir,
    )

    # Stack frames (memory optimisation)
    if FLAGS.num_stack_frames > 0:
        print("Stack frames enabled: n=%d" % FLAGS.num_stack_frames)
        env = FrameStack(env, FLAGS.num_stack_frames)

    model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                                    hiddens=[256],
                                    dueling=True)

    act = deepq.learn(
        env,
        q_func=model,
        lr=FLAGS.learning_rate,  # Learning rate for adam optimizer
        max_timesteps=FLAGS.max_timesteps,  # Max timesteps
        buffer_size=FLAGS.buffer_size,  # Size of replay buffer
        exploration_fraction=FLAGS.
        exploration_fraction,  # Fraction of max_timesteps over which exploration rate is annealed
        exploration_final_eps=FLAGS.
        exploration_final_eps,  # Final value of random action probability
        train_freq=FLAGS.
        train_freq,  # How often the model is updated, in steps
        print_freq=FLAGS.
        print_freq,  # How often training progress is printed, in episodes
        checkpoint_freq=FLAGS.
        checkpoint_freq,  # How often to save the model, in steps
        learning_starts=FLAGS.
        learning_starts,  # How many steps before learning starts
        gamma=FLAGS.gamma,  # Discount factor
        target_network_update_freq=FLAGS.
        target_network_update_freq,  # How often the target network is updated
        prioritized_replay=FLAGS.prioritized_replay,
        callback=deepq_callback,
    )

    print("Saving model...")
    save_model(act)

    print("Saving replay...")
    env.unwrapped.sc2_env.save_replay(FLAGS.map_name)

    print("Closing environment...")
    env.close()
示例#4
0
 def __init__(self, env_name, num_episodes, exp_name, policy):
     self.exp_name = exp_name
     self.env = gym.make(env_name)
     self.env = ProcessFrame84(self.env, crop=False)
     self.env = FrameStack(self.env, 4)
     self.num_episodes = 1
     self.policy = policy
     if not os.path.exists('images'):
         os.mkdir('images')
     self.image_folder = os.path.join(
         os.path.abspath(os.path.dirname(__file__)), 'images')
     print('Image folder', self.image_folder)
def main():
    # create the game enviroment
    # To use make_atari from baselines name must contains "NoFrameskip"
    env = make_atari("BreakoutNoFrameskip-v0")
    # Convert it to gray scale and resize it to 84x84
    env = WarpFrame(env)
    # Stack last 4 frame to create history
    env = FrameStack(env, k=4)
    # initialize the model
    # image input so cnn
    # convs = [n_outputs, karnel_size, stride]
    model = deepq.models.cnn_to_mlp(convs=[(32, 3, 1), (32, 3, 1)],
                                    hiddens=[256])
    # train the model
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-2,
        # number of iteration to optimizer for
        max_timesteps=10000,
        buffer_size=1000,
        # fraction of entire training period over which the exploration rate is annealed
        exploration_fraction=0.1,
        # final value of random action probability
        exploration_final_eps=0.01,
        print_freq=10)
    print("Saving model to breakout_model.pkl")
    act.save("breakout_model.pkl")
示例#6
0
def make_env_all_params(rank, add_monitor, args, logdir):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()
    elif args["env_kind"] == "dm_suite":
        env = make_dm_suite(task=args["env"],
                            logdir=logdir,
                            to_record=args["to_record"])

    if add_monitor:
        env = TempMonitor(env)

    return env
示例#7
0
def make_env(env_idx):
    """
    Create an environment with some standard wrappers.
    """


    # Make the environment


    levelList = ['SuperMarioBros-1-1-v0','SuperMarioBros-1-2-v0','SuperMarioBros-1-3-v0','SuperMarioBros-1-4-v0','SuperMarioBros-2-1-v0','SuperMarioBros-2-2-v0','SuperMarioBros-2-3-v0','SuperMarioBros-2-4-v0']


    # record_path = "./records/" + dicts[env_idx]['state']
    env = gym_super_mario_bros.make(levelList[env_idx])
    

    env = JoypadSpace(env, SIMPLE_MOVEMENT)

    #env = RewardScaler(env)

    # PreprocessFrame
    env = PreprocessFrame(env)


    # Stack 4 frames
    env = FrameStack(env,4)

    # Allow back tracking that helps agents are not discouraged too heavily
    # from exploring backwards if there is no way to advance
    # head-on in the level.
    env = AllowBacktracking(env)

    return env
示例#8
0
def make_test():
    """
    Create an environment with some standard wrappers.
    """

    # Make the environment
    env = gym_super_mario_bros.make('SuperMarioBros-1-4-v0')
    
    env = JoypadSpace(env, SIMPLE_MOVEMENT)

    # Scale the rewards
    #env = RewardScaler(env)

    # PreprocessFrame
    env = PreprocessFrame(env)

    # Stack 4 frames
    env = FrameStack(env, 4) # This can be changed. 

    # Allow back tracking that helps agents are not discouraged too heavily
    # from exploring backwards if there is no way to advance
    # head-on in the level.
    env = AllowBacktracking(env)

    return env
示例#9
0
def make_mario_env(crop=True, frame_stack=True, clip_rewards=False):
    assert clip_rewards is False
    import gym
    import retro
    import os
    from baselines.common.atari_wrappers import FrameStack

    # gym.undo_logger_setup()
    if MPI.COMM_WORLD.Get_rank() == 0:
        if not os.path.isdir('./mario_bk2'):
            os.mkdir('./mario_bk2')

        env = retro.make('SuperMarioBros-Nes',
                         'Level1-1',
                         record='./mario_bk2')
    else:
        env = retro.make('SuperMarioBros-Nes', 'Level1-1')

    buttons = env.buttons
    env = MarioXReward(env)
    env = FrameSkip(env, 4)
    env = ProcessFrame84(env, crop=crop)
    if frame_stack:
        env = FrameStack(env, 4)
    env = LimitedDiscreteActions(env, buttons)
    return env
示例#10
0
def make_env(game=None,
             state=None,
             stack=False,
             scale_rew=True,
             allowbacktrace=False,
             custom=True):
    """
    Create an environment with some standard wrappers.
    """
    #env = grc.RemoteEnv('tmp/sock')
    #env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1')
    #env = retro.make(game='StreetsOfRage2-Genesis', state='1Player.Axel.Level1')
    #env = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', state='Champion.Level1.RyuVsGuile')
    #env = retro.make(game='SuperMarioWorld-Snes', state='Bridges1')
    env = retro.make(game=game, state=state)
    #SuperMarioWorld-Snes ['Bridges1',
    env.seed(0)
    env = SonicDiscretizerV3(env)
    #env = StreetOfRage2Discretizer(env)
    #env = StreeFighter2Discretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrameRGB(env)
    if custom:
        env = CustomGym(env)
    if allowbacktrace:
        env = AllowBacktracking(env)
    if stack:
        env = FrameStack(env, 4)
    env = Controller_Gym(env)
    return env
示例#11
0
def make_env(stack=True, scale_rew=True, local=False, level_choice=None):
    """
    Create an environment with some standard wrappers.
    """
    print(stack, scale_rew, local)
    if local:  # Select Random Level if local
        from retro_contest.local import make
        levels = [
            'SpringYardZone.Act3', 'SpringYardZone.Act2', 'GreenHillZone.Act3',
            'GreenHillZone.Act1', 'StarLightZone.Act2', 'StarLightZone.Act1',
            'MarbleZone.Act2', 'MarbleZone.Act1', 'MarbleZone.Act3',
            'ScrapBrainZone.Act2', 'LabyrinthZone.Act2', 'LabyrinthZone.Act1',
            'LabyrinthZone.Act3'
        ]
        if not level_choice:
            level_choice = levels[random.randrange(0, 13, 1)]
        else:
            level_choice = levels[level_choice]
        env = make(game='SonicTheHedgehog-Genesis', state=level_choice)
    else:
        print('connecting to remote environment')
        env = grc.RemoteEnv('tmp/sock')
        print('starting episode')
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
示例#12
0
def wrap_env_dqn(env):
    env = ThresholdResizeFrame(env)
    env = ClipRewardEnv(env)
    env = MaxAndSkipEnv(env, skip=4)
    env = Monitor(env, logger.get_dir(), allow_early_resets=True)
    env = FrameStack(env, 4)
    return env
示例#13
0
def make_test():
    """
    Create an environment with some standard wrappers.
    """

    # Make the environment
    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = BinarySpaceToDiscreteSpaceEnv(env, RIGHT_ONLY)
    print(env.action_space)
    # Build the actions array
    # env = ActionsDiscretizer(env)

    # Scale the rewards
    # env = RewardScaler(env)

    # PreprocessFrame
    env = PreprocessFrame(env)

    # Stack 4 frames
    env = FrameStack(env, 6)

    # Allow back tracking that helps agents are not discouraged too heavily
    # from exploring backwards if there is no way to advance
    # head-on in the level.
    # env = AllowBacktracking(env)

    return env
示例#14
0
def wrap_environment(environment, n_frames=4):
    environment = ActionsDiscretizer(environment)
    environment = RewardScaler(environment)
    environment = PreprocessFrame(environment)
    environment = FrameStack(environment, n_frames)
    environment = AllowBacktracking(environment)
    return environment
示例#15
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == "atari":
        env = gym.make(args["env"])
        assert "NoFrameskip" in env.spec.id
        # from self-supervised exploration via disagreement
        if args["stickyAtari"] == "true":
            env = StickyActionEnv(env)
        env._max_episode_steps = args["max_episode_steps"] * 4
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args["max_episode_steps"])
        if "Montezuma" in args["env"]:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
        if args["noisy_tv"] == "true":
            env = NoisyTVEnvWrapper(env)
        # assert env.action_space == spaces.Discrete(7)
    elif args["env_kind"] == "mario":
        env = make_mario_env()
        if args["noisy_tv"] == "true":
            env = NoisyTVEnvWrapperMario(env)
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == "robopong":
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), "%.2i" % rank))
    return env
示例#16
0
def wrap_modified_rr(env, episode_life=True, episode_reward=False, episode_frame=False,
                     norm_rewards=True,
                     frame_stack=False, scale=False):
    """Configure environment for DeepMind-style Atari modified as described in RUDDER paper;
    """
    if episode_life:
        print("Episode Life")
        env = EpisodicLifeEnv(env)
    if episode_reward:
        print("Episode Reward")
        env = EpisodicRewardEnv(env)
    if episode_frame:
        print("Episode Frame")
        env = EpisodicFrameEnv(env)
    _ori_r_games = ['DoubleDunk', 'Boxing', 'Freeway', 'Pong',
                    'Bowling', 'Skiing', 'IceHockey', 'Enduro']
    original_reward = any([game in env.spec.id for game in _ori_r_games])

    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    if scale:
        env = ScaledFloatFrame(env)
    if norm_rewards and not original_reward:
        print("Normalizing reward....")
        env = NormRewardEnv(env, 100.)
    else:
        print("Normal reward")
    if frame_stack:
        env = FrameStack(env, 4)
    return env
示例#17
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
示例#18
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=150, height=100, grayscale=grayscale)
     env = FrameStack(env, frame_stack)
     env = ScaledFloatFrame(env)
     env = RewardScaler(env, scale=1 / 100.0)
     return env
示例#19
0
def wrap_deepmind(env,
                  downsample=True,
                  episode_life=True,
                  clip_rewards=True,
                  frame_stack=False,
                  scale=False,
                  color=False):
    """Configure environment for DeepMind-style Atari.
    """
    if ("videopinball" in str(env.spec.id).lower()) or ('tennis' in str(
            env.spec.id).lower()):
        env = WarpFrame(env, width=160, height=210, grayscale=False)
    if episode_life:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    if downsample:
        env = WarpFrame(env, grayscale=False)
    if not color:
        env = GrayscaleWrapper(env)
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    if frame_stack:
        env = FrameStack(env, 4)
    return env
示例#20
0
def make_env(env_idx):
    dicts = [
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act3'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act3'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act3'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'ScrapBrainZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act3'}
        ]

    print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True)

    env = make(game=dicts[env_idx]['game'],
             state=dicts[env_idx]['state'], bk2dir="./records")

    env = ActionsDiscretizer(env)

    env = RewardScaler(env)

    env = PreprocessFrame(env)

    env = FrameStack(env, 4)

    env = AllowBackTracking(env)

    return env
示例#21
0
def make_unity_maze(env_id, seed=0, rank=0, expID=0, frame_stack=True,
                    logdir=None, ext_coeff=1.0, recordUnityVid=False, **kwargs):
    import os
    import sys
    import time
    try:
        sys.path.insert(0, os.path.abspath("ml-agents/python/"))
        from unityagents import UnityEnvironment
        from unity_wrapper import GymWrapper
    except ImportError:
        print("Import error in unity environment. Ignore if not using unity.")
        pass
    from baselines.common.atari_wrappers import FrameStack
    # gym.undo_logger_setup()  # deprecated in new version of gym

    # max 20 workers per expID, max 30 experiments per machine
    if rank >= 0 and rank <= 200:
        time.sleep(rank * 2)
    env = UnityEnvironment(file_name='envs/' + env_id,
                           worker_id=(expID % 60) * 200 + rank)
    maxsteps = 3000 if 'big' in env_id else 500
    env = GymWrapper(env, seed=seed, rank=rank, expID=expID, maxsteps=maxsteps,
                     **kwargs)
    if "big" in env_id:
        env = UnityRoomCounterWrapper(env, use_ext_reward=(ext_coeff != 0.0))
    if rank == 1 and recordUnityVid:
        env = RecordBestScores(env, directory=logdir, freq=1)
    print('Loaded environment %s with rank %d\n\n' % (env_id, rank))

    # env = NoReward(env)
    # env = FrameSkip(env, 4)
    env = ProcessFrame84(env, crop=False)
    if frame_stack:
        env = FrameStack(env, 4)
    return env
def make_test():
    """
    Create an environment with some standard wrappers.
    """

    # Make the environment
    env = make_retro(game='SonicTheHedgehog-Genesis',
                     state='GreenHillZone.Act2',
                     record="./records")

    # Build the actions array
    env = ActionsDiscretizer(env)

    # Scale the rewards
    env = RewardScaler(env)

    # PreprocessFrame
    env = PreprocessFrame(env)

    # Stack 4 frames
    env = FrameStack(env, 4)

    # Allow back tracking that helps agents are not discouraged too heavily
    # from exploring backwards if there is no way to advance
    # head-on in the level.
    env = AllowBacktracking(env)

    return env
示例#23
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        if args["stickyAtari"]:  # 在智能体执行动作时增加随机性
            env._max_episode_steps = args['max_episode_steps'] * 4
            env = StickyActionEnv(env)
        else:
            env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)  # 每个动作连续执行4步
        env = ProcessFrame84(env, crop=False)  # 处理观测
        env = FrameStack(env, 4)  # 将连续4帧叠加起来作为输入
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if not args["stickyAtari"]:
            env = ExtraTimeLimit(env,
                                 args['max_episode_steps'])  # 限制了一个周期的最大时间步
        if 'Montezuma' in args['env']:  # 记录智能体的位置, 所在的房间, 已经访问的房间
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':  # 超级马里奥
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":  # 多智能体游戏, Multi-Pong
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
示例#24
0
def make_val(env_idx, stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    dicts = [
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act3'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'ScrapBrainZone.Act1'},
        {'game': 'SonicTheHedgehog2-Genesis', 'state': 'MetropolisZone.Act3'},
        {'game': 'SonicTheHedgehog2-Genesis', 'state': 'HillTopZone.Act2'},
        {'game': 'SonicTheHedgehog2-Genesis', 'state': 'CasinoNightZone.Act2'},
        {'game': 'SonicAndKnuckles3-Genesis', 'state': 'LavaReefZone.Act1'},
        {'game': 'SonicAndKnuckles3-Genesis', 'state': 'FlyingBatteryZone.Act2'},
        {'game': 'SonicAndKnuckles3-Genesis', 'state': 'HydrocityZone.Act1'},
        {'game': 'SonicAndKnuckles3-Genesis', 'state': 'AngelIslandZone.Act2'}
    ]
    print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True)
    env = make(game=dicts[env_idx]['game'], state=dicts[env_idx]['state'])#, bk2dir='/tmp')#, record='/tmp')
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame96(env)
    if stack:
        env = FrameStack(env, 4)
    env = AllowBacktracking(env)
    return env
def make_test():
    """
    Create an environment with some standard wrappers.
    """

    envIdx = 0
    # Here we add record because we want to output a video
    env = make(game=dicts[envIdx]['game'], state=dicts[envIdx]['state'])

    # Build the actions array,
    env = ActionsDiscretizer(env, dicts[envIdx]['game'])

    # Scale the rewards
    #env = RewardScaler(env)

    # PreprocessFrame
    env = PreprocessFrame(env)

    # Stack 4 frames
    env = FrameStack(env, 4)

    # Allow back tracking that helps agents are not discouraged too heavily
    # from exploring backwards if there is no way to advance
    # head-on in the level.
    env = AllowBacktracking(env)

    return env
def make_env(env_idx):
    """
    Create an environment with some standard wrappers.
    """

    # Make the environment
    print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True)
    #record_path = "./records/" + dicts[env_idx]['state']
    env = make(
        game=dicts[env_idx]['game'],
        state=dicts[env_idx]['state'])  #, bk2dir="./records")#record='/tmp')

    # Build the actions array,
    env = ActionsDiscretizer(env, dicts[env_idx]['game'])

    # Scale the rewards
    #env = RewardScaler(env)

    # PreprocessFrame
    env = PreprocessFrame(env)

    # Stack 4 frames
    env = FrameStack(env, 4)
    #env = SkipEnv(env, 2)

    # Allow back tracking that helps agents are not discouraged too heavily
    # from exploring backwards if there is no way to advance
    # head-on in the level.
    env = AllowBacktracking(env)

    return env
示例#27
0
def make_env(stack=True, scale_rew=True, game=None, state=None, seed=0, render=False):
    """
    Create an environment with some standard wrappers.
    """
    # if not is_remote:
    #     if game is None or state is None:
    #         import data_set_reader
    #         train_set = data_set_reader.read_train_set()
    #         game, state = random.choice(train_set)
    #     print("it's local env: ", game, state)
    #     from retro_contest.local import make
    #     env = make(game=game, state=state)
    # else:
    #     print("it's remote env")
    #     import gym_remote.client as grc
    #     env = grc.RemoteEnv('tmp/sock')
    env = make(game=game, state=state)
    env.seed(seed)
    env = AllowBacktracking(env)
    env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed)), allow_early_resets=True)
    env = SonicDiscretizer(env, render)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
示例#28
0
class AllowBacktracking(gym.Wrapper):
	def __init__(self, env):
		super(AllowBacktracking, self).__init__(env)
		self._cur_x = 0
		self._max_x = 0
		return self.env.reset(**kwargs)

	def reset(self, **kwargs): # pylint: disable=E0202
        self._cur_x = 0
        self._max_x = 0
        return self.env.reset(**kwargs)

    def step(self, action): # pylint: disable=E0202
        obs, rew, done, info = self.env.step(action)
        self._cur_x += rew
        rew = max(0, self._cur_x - self._max_x)
        self._max_x = max(self._max_x, self._cur_x)
        return obs, rew, done, info
		
def make_env(env_idx):
	dicts = [
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act3'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'SpringYardZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act3'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'GreenHillZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'StarLightZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'MarbleZone.Act3'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'ScrapBrainZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act2'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act1'},
        {'game': 'SonicTheHedgehog-Genesis', 'state': 'LabyrinthZone.Act3'}
    ]

    # Make the environment
    print(dicts[env_idx]['game'], dicts[env_idx]['state'], flush=True)
    #record_path = "./records/" + dicts[env_idx]['state']
    env = make(game=dicts[env_idx]['game'], state=dicts[env_idx]['state'], bk2dir="./records")#record='/tmp')

    # Build the actions array, 
    env = ActionsDiscretizer(env)

    # Scale the rewards
    env = RewardScaler(env)

    # PreprocessFrame
    env = PreprocessFrame(env)

    # Stack 4 frames
    env = FrameStack(env, 4)

    # Allow back tracking that helps agents are not discouraged too heavily
    # from exploring backwards if there is no way to advance
    # head-on in the level.
    env = AllowBacktracking(env)

    return env
示例#29
0
def _make_dqn(unity_env, train_mode, reward_range=(-np.inf, np.inf)):
    env = MLToGymEnv(unity_env,
                     train_mode=train_mode,
                     reward_range=reward_range)
    env = FloatToUInt8Frame(env)
    env = WarpFrame(env)  # Makes sure we have 84 x 84 b&w
    env = FrameStack(env, 4)  # Stack last 4 frames
    return env
示例#30
0
 def __init__(self, level=2, frame_size=64, mode=CPU, *args, **kwargs):
     self.env = Cls(level=level, *args, **kwargs)
     self.env = GRewardScaler(self.env, scale=1)
     if mode == CPU:
         self.env = GPreprocessFrame(self.env, size=frame_size)
         self.env = FrameStack(self.env, 4)
     else:
         self.env = SetPlayingMode(target_mode=HUMAN)(self.env)