Exemplo n.º 1
0
    def _thunk():
        if "mario" in env_id:
            env = gym_super_mario_bros.make('SuperMarioBros-v0')
            env = JoypadSpace(env, SIMPLE_MOVEMENT)
        elif env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        env.seed(seed + rank)
        if log_dir is not None:
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))
        if is_atari:
            env = wrap_deepmind(env, clip_rewards=False)
        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if "mario" in env_id:
            env = wrap_deepmind(env, clip_rewards=False, episode_life=False)
            env = WrapPyTorchMario(env)
        elif len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = WrapPyTorch(env)

        return env
Exemplo n.º 2
0
 def from_args(cls, args, seed, **kwargs):
     env = gym_super_mario_bros.make(args.env)
     env = JoypadSpace(env, [["right"], ["right", "A"]])
     env = SkipFrame(env, skip=args.skip_rate)
     env._max_episode_steps = args.max_episode_length
     env.seed(seed)
     return cls(
         env, args.frame_stack, args.skip_rate, args.max_episode_length
     )
Exemplo n.º 3
0
 def fn():
     env = gym.make(env_id)
     if 'SuperMarioBros' in env_id:
         env = JoypadSpace(env, actions)
         env = ReshapeReward(env, monitor=None)
         env = SkipObs(env)
     env = SingleEnv(env)
     env.seed(seed + rank)
     env.action_space.seed(seed + rank)
     return env
Exemplo n.º 4
0
    def create(self, env_id, seed=None):
        try:
            env = gym.make(env_id)
            env = JoypadSpace(env, SIMPLE_MOVEMENT)
            if seed:
                env.seed(seed)
        except gym.error.Error:
            raise InvalidUsage("Attempted to look up malformed environment ID '{}'".format(env_id))

        instance_id = str(uuid.uuid4().hex)[:self.id_len]
        self.envs[instance_id] = env
        return instance_id
Exemplo n.º 5
0
    def wrap_():
        env = gym_super_mario_bros.make(env_name)
        env = JoypadSpace(env, [["right"], ["right", "A"]])
        env.seed(seed)
        env = Monitor(env, './')
        env = MaxAndSkip(env, skip=4)
        env = ProcessFrame84(env)
        env = ImageToPytorch(env)

        env = FrameStack(env, 4)

        env = ClipReward(env)
        return env
Exemplo n.º 6
0
def make_env():
    env = gym_super_mario_bros.make('SuperMarioBros-v3')
    env = JoypadSpace(env, RIGHT_ONLY)
    env = CustomRewardAndDoneEnv(env)  # 報酬とエピソード完了の変更
    env = StochasticFrameSkip(env, n=4, stickprob=0.25)  # スティッキーフレームスキップ
    env = Downsample(env, 2)  # ダウンサンプリング
    env = FrameStack(env, 4)  # フレームスタック
    env = ScaledFloatFrame(env)  # 状態の正規化
    env = Monitor(env, log_dir, allow_early_resets=True)
    env.seed(0)  # シードの指定
    set_global_seeds(0)
    env = DummyVecEnv([lambda: env])  # ベクトル環境の生成

    print('行動空間: ', env.action_space)
    print('状態空間: ', env.observation_space)

    return env
Exemplo n.º 7
0
def make_env(cfg):
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    elif cfg.env == 'point_mass_easy':
        domain_name = 'point_mass'
        task_name = 'easy'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26
    camera_id = 2 if domain_name == 'quadruped' else 0

#     env = dmc2gym.make(domain_name=domain_name,
#                        task_name=task_name,
#                        seed=cfg.seed,
#                        visualize_reward=False,
#                        from_pixels=True,
#                        height=cfg.image_size,
#                        width=cfg.image_size,
#                        frame_skip=cfg.action_repeat,
#                        camera_id=camera_id)
    # env = gym.make("CarRacing-v0")
    env_ = gym_tetris.make('TetrisA-v0')
    env = JoypadSpace(env_, SIMPLE_MOVEMENT)
    # env = MaxAndSkipEnv(env)
    # env._max_episode_steps = env_._max_episode_steps
    max_episode_steps = 10000
    env = WrapPyTorch(env, max_episode_steps)
    env.seed(cfg.seed)
    # print(env.ram)
    obs = env.reset()
    print(obs.shape)
    # env.seed(cfg.seed)

    env = utils.FrameStack(env, k=cfg.frame_stack)
    print("Init env done")
    # assert env.action_space.low.min() >= -1
    # assert env.action_space.high.max() <= 1

    return env
Exemplo n.º 8
0
            def _thunk():
                mario_env = JoypadSpace(SuperMario_Env(world, stage, version),
                                        movement)

                if wrap_atari:
                    mario_env._max_episode_steps = max_episode_steps * 4
                    mario_env = StickyActionEnv(mario_env)
                    mario_env = MaxAndSkipEnv(mario_env, skip=4)
                    mario_env = DummyMontezumaInfoWrapper(mario_env)
                    mario_env = AddRandomStateToInfo(mario_env)
                # mario_env.seed(seed + rank)

                mario_env = Monitor(
                    mario_env,
                    logger.get_dir()
                    and os.path.join(logger.get_dir(), str(rank)),
                    allow_early_resets=True)
                if wrap_atari:
                    mario_env = wrap_deepmind(mario_env)
                    mario_env = BlocksWrapper(mario_env)

                mario_env.seed(seed)

                return mario_env
from nes_py.wrappers import JoypadSpace
from Contra.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT, RIGHT_ONLY

ENV_NAME = 'Contra-v0'

CUSTOM_MOVEMENT = [
    ['right'],
    ['right', 'A'],
    ['right', 'B'],
    ['right', 'A', 'B'],
]

env = gym.make(ENV_NAME)
env = JoypadSpace(env, RIGHT_ONLY)
np.random.seed(120)
env.seed(120)
nb_actions = env.action_space.n

print(env.observation_space.shape)
print(env)

model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Reshape(env.observation_space.shape))
model.add(
    Conv2D(32, (3, 3),
           activation='relu',
           input_shape=env.observation_space.shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
Exemplo n.º 10
0
#     for idx, key in enumerate(keys):
#         axarr[idx].plot(episodes, data[key])
#         axarr[idx].set_ylabel(key)
#     plt.xlabel('episodes')
#     plt.tight_layout()
#     if output is None:
#         plt.show()
#     else:
#         plt.savefig(output)

ENV_NAME = 'CustomContra-v2'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
env = JoypadSpace(env, CUSTOM_MOVEMENT)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
print(nb_actions)
print(env.observation_space.shape)
obs_dim = env.observation_space.shape[0]

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
print(device)

Tensor = torch.Tensor
LongTensor = torch.LongTensor

env = gym_super_mario_bros.make('SuperMarioBros-v0')
#env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)
env = JoypadSpace(env, SIMPLE_MOVEMENT)

#directory = './MarioVideos/'
directory = './MarioVideosLong/'
env = gym.wrappers.Monitor(
    env, directory, video_callable=lambda episode_id: episode_id % 5000 == 0)

seed_value = 23
env.seed(seed_value)
torch.manual_seed(seed_value)
random.seed(seed_value)

###### PARAMS ######
learning_rate = 0.0001
#num_episodes = 5000
num_episodes = 9999999999
startNum = 500
#newModel = False
newModel = False

gamma = 0.99

hidden_layer = 512
Exemplo n.º 12
0
def run_agent(agent, rendering=False, monitoring=False, print_reward=False):

    env = gym_super_mario_bros.make("SuperMarioBros-v0")
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    env.seed(42)

    if monitoring:
        env = Monitor(env, './video', force=True)
    agent.eval()

    state = env.reset()
    if rendering:
        env.render()

    #Conv2d without flatten()
    state = convert_image(state)  #.flatten()
    state_list = [state, state, state, state]
    position = -1

    global_reward = 0
    s = 0
    for _ in range(10000):
        #Conv2d input
        input = torch.from_numpy(np.array(state_list)).type('torch.FloatTensor')\
            .unsqueeze(0)

        #Linear input
        #input = torch.tensor(state_list).type("torch.FloatTensor").view(1,-1)

        output_probabilities = agent(input).detach().numpy()[0]
        action = np.random.choice(range(action_count), 1, \
            p=output_probabilities).item()
        new_state, reward, done, info = env.step(action)
        global_reward += reward

        s = s + 1
        if rendering:
            env.render()

        state_list.pop()
        #Conv2d without flatten()
        state_list.append(convert_image(new_state))  #.flatten())

        # if mario gets stuck, it gets punished and the loop gets broken
        if position == info["x_pos"]:
            stuck += 1
            if stuck == 100:
                global_reward -= 100
                break
        else:
            stuck = 0

        position = info["x_pos"]
        #env.render()
        #Mario died
        if info["life"] < 2:
            break
    if print_reward:
        print(global_reward)

    return global_reward