Пример #1
0
def start_game(game,
               state,
               directory='tmp',
               steps=1000000,
               discrete_actions=False,
               bk2dir=None):
    use_restricted_actions = retro.ACTIONS_FILTERED
    if discrete_actions:
        use_restricted_actions = retro.ACTIONS_DISCRETE

    try:
        env = retro.make(game,
                         state,
                         scenario='contest',
                         use_restricted_actions=use_restricted_actions)
    except Exception:
        env = retro.make(game,
                         state,
                         use_restricted_actions=use_restricted_actions)

    if bk2dir:
        env.auto_record(bk2dir)

    env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=4500)

    env = RemoteEnvWrapper(env, directory)
    env.serve(steps)
Пример #2
0
def make_rand_env(game_states,
                  stack=2,
                  scale_rew=True,
                  color=False,
                  exp_type=['x'],
                  exp_const=[0.002],
                  max_episode_steps=4500,
                  maml=False,
                  small_size=False):
    """
    Create an environment with some standard wrappers.
    """
    game, state = game_states[0]
    env = make(game, state)

    if maml:
        env_rand = RandomEnvironmen2(env, game_states)
    else:
        env_rand = RandomEnvironmen(env, game_states)

    env = retro_contest.StochasticFrameSkip(env_rand, n=4, stickprob=0.25)

    env = BackupOriginalData(env)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps)

    env = SonicDiscretizer(env)
    env = AllowBacktracking(env)

    if scale_rew:
        env = RewardScaler(env)

    env = WarpFrame(env, color, small_size)

    assert len(exp_type) == len(exp_const)
    for t, c in zip(exp_type, exp_const):
        if c > 0:
            if t == 'obs':
                env = ObsExplorationReward(env, c, game_specific=True)
            elif t == 'x':
                env = XExplorationReward(env, c, game_specific=True)
            else:
                raise ValueError('unknown exploration {}'.format(t))

    if stack > 1:
        env = FrameStack(env, stack)

    env = EpisodeInfo(env)

    if maml:
        env.sample = env_rand.sample

    return env
Пример #3
0
def make(game, state=retro.State.DEFAULT, discrete_actions=False, bk2dir=None):
    use_restricted_actions = retro.Actions.FILTERED
    if discrete_actions:
        use_restricted_actions = retro.Actions.DISCRETE
    try:
        env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions)
    except Exception:
        env = retro.make(game, state, use_restricted_actions=use_restricted_actions)
    if bk2dir:
        env.auto_record(bk2dir)
    env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=4500)
    return env
def make(game, state, discrete_actions=False, bk2dir=None, max_episode_steps=4000):
    """Make the competition environment."""
    print('game:', game, 'state:', state)
    use_restricted_actions = retro.ACTIONS_FILTERED
    if discrete_actions:
        use_restricted_actions = retro.ACTIONS_DISCRETE
    try:
        env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions)
    except Exception:
        env = retro.make(game, state, use_restricted_actions=use_restricted_actions)
    if bk2dir:
        env.auto_record(bk2dir)
    env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps)
    return env
Пример #5
0
def make_env(game, state, stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = make(game, state)
    env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=4500)

    env = SonicDiscretizer(env)
    env = AllowBacktracking(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    env = EpisodeInfo(env)
    return env
Пример #6
0
def make(game, state=retro.STATE_DEFAULT, discrete_actions=False, bk2dir=None,monitordir=None, scenario='contest'):
    use_restricted_actions = retro.ACTIONS_FILTERED
    if discrete_actions:
        use_restricted_actions = retro.ACTIONS_DISCRETE
    try:
        #env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions)
        env = retro.make(game, state, scenario=scenario, use_restricted_actions=use_restricted_actions)
    except Exception:
        env = retro.make(game, state, use_restricted_actions=use_restricted_actions)
    if bk2dir:
        env.auto_record(bk2dir)
    #added this
    if monitordir:
        env = retro_contest.Monitor(env, os.path.join(monitordir, 'monitor.csv'), os.path.join(monitordir, 'log.csv'))
    env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=4500)
    #env.serve(timestep_limit=10000, ignore_reset=True)
    return env
def retro_make(game, state=retro.State.DEFAULT, discrete_actions=False, bk2dir=None, record='.'):
    use_restricted_actions = retro.Actions.FILTERED
    if discrete_actions:
        use_restricted_actions = retro.Actions.DISCRETE
    try:
        env = retro.make(game, state, scenario='deep_thought', record=record, use_restricted_actions=use_restricted_actions)
    except Exception as e:
        print('EXCEPTION in retro_make')
        print(traceback.format_exc())
        env = retro.make(game, state, use_restricted_actions=use_restricted_actions)
    if bk2dir:
        env.auto_record(bk2dir)
    env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25)
    if game in game_wrappers:
        env = game_wrappers[game](env)
    else:
        env = gym.wrappers.TimeLimit(env, max_episode_steps=4500)

    return env
Пример #8
0
def make_maml_env(game_states,
                  stack=2,
                  scale_rew=True,
                  color=False,
                  exp_type='x',
                  exp_const=0.002,
                  max_episode_steps=4500):
    """
    Create an environment with some standard wrappers.
    """
    game, state = game_states[0]
    env = make(game, state)

    env_rand = RandomEnvironmen2(env, game_states)
    env = retro_contest.StochasticFrameSkip(env_rand, n=4, stickprob=0.25)

    env = BackupOriginalData(env)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps)

    env = SonicDiscretizer(env)
    env = AllowBacktracking(env)

    if scale_rew:
        env = RewardScaler(env)

    env = WarpFrame(env, color)

    if exp_const > 0:
        if exp_type == 'obs':
            env = ObsExplorationReward(env, exp_const, game_specific=True)
        elif exp_type == 'x':
            env = XExplorationReward(env, exp_const, game_specific=True)

    if stack > 1:
        env = FrameStack(env, stack)

    env = ScaledFloatFrame(env)
    env = EpisodeInfo(env)

    env.sample = env_rand.sample

    return env
Пример #9
0
def make(game,
         state=retro.State.DEFAULT,
         discrete_actions=False,
         bk2dir=None,
         monitordir=None,
         scenario='scenario'):
    use_restricted_actions = retro.Actions.FILTERED  #retro.ACTIONS_FILTERED
    if discrete_actions:
        use_restricted_actions = retro.Actions.DISCRETE  #retro.ACTIONS_DISCRETE
    try:
        #env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions)
        env = retro.make(game,
                         state,
                         scenario=scenario,
                         use_restricted_actions=use_restricted_actions)
    except Exception:
        env = retro.make(game,
                         state,
                         use_restricted_actions=use_restricted_actions)
    if bk2dir:
        env.auto_record(bk2dir)
    #added this
    if monitordir:
        time_int = int(time.time())
        env = retro_contest.Monitor(
            env, os.path.join(monitordir, 'monitor_{}.csv'.format(time_int)),
            os.path.join(monitordir, 'log_{}.csv'.format(time_int)))
    #bust a move
    #env = retro_contest.StochasticFrameSkip(env, n=6, stickprob=0.0) #n=10, did some analysis on this
    #contra
    env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.0)
    #sonic
    #env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25)
    env = gym.wrappers.TimeLimit(env, max_episode_steps=8000)
    #env.serve(timestep_limit=10000, ignore_reset=True)
    return env