def _thunk(): if "mario" in env_id: env = gym_super_mario_bros.make('SuperMarioBros-v0') env = JoypadSpace(env, SIMPLE_MOVEMENT) elif env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank))) if is_atari: env = wrap_deepmind(env, clip_rewards=False) # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if "mario" in env_id: env = wrap_deepmind(env, clip_rewards=False, episode_life=False) env = WrapPyTorchMario(env) elif len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = WrapPyTorch(env) return env
def from_args(cls, args, seed, **kwargs): env = gym_super_mario_bros.make(args.env) env = JoypadSpace(env, [["right"], ["right", "A"]]) env = SkipFrame(env, skip=args.skip_rate) env._max_episode_steps = args.max_episode_length env.seed(seed) return cls( env, args.frame_stack, args.skip_rate, args.max_episode_length )
def fn(): env = gym.make(env_id) if 'SuperMarioBros' in env_id: env = JoypadSpace(env, actions) env = ReshapeReward(env, monitor=None) env = SkipObs(env) env = SingleEnv(env) env.seed(seed + rank) env.action_space.seed(seed + rank) return env
def create(self, env_id, seed=None): try: env = gym.make(env_id) env = JoypadSpace(env, SIMPLE_MOVEMENT) if seed: env.seed(seed) except gym.error.Error: raise InvalidUsage("Attempted to look up malformed environment ID '{}'".format(env_id)) instance_id = str(uuid.uuid4().hex)[:self.id_len] self.envs[instance_id] = env return instance_id
def wrap_(): env = gym_super_mario_bros.make(env_name) env = JoypadSpace(env, [["right"], ["right", "A"]]) env.seed(seed) env = Monitor(env, './') env = MaxAndSkip(env, skip=4) env = ProcessFrame84(env) env = ImageToPytorch(env) env = FrameStack(env, 4) env = ClipReward(env) return env
def make_env(): env = gym_super_mario_bros.make('SuperMarioBros-v3') env = JoypadSpace(env, RIGHT_ONLY) env = CustomRewardAndDoneEnv(env) # 報酬とエピソード完了の変更 env = StochasticFrameSkip(env, n=4, stickprob=0.25) # スティッキーフレームスキップ env = Downsample(env, 2) # ダウンサンプリング env = FrameStack(env, 4) # フレームスタック env = ScaledFloatFrame(env) # 状態の正規化 env = Monitor(env, log_dir, allow_early_resets=True) env.seed(0) # シードの指定 set_global_seeds(0) env = DummyVecEnv([lambda: env]) # ベクトル環境の生成 print('行動空間: ', env.action_space) print('状態空間: ', env.observation_space) return env
def make_env(cfg): """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' elif cfg.env == 'point_mass_easy': domain_name = 'point_mass' task_name = 'easy' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26 camera_id = 2 if domain_name == 'quadruped' else 0 # env = dmc2gym.make(domain_name=domain_name, # task_name=task_name, # seed=cfg.seed, # visualize_reward=False, # from_pixels=True, # height=cfg.image_size, # width=cfg.image_size, # frame_skip=cfg.action_repeat, # camera_id=camera_id) # env = gym.make("CarRacing-v0") env_ = gym_tetris.make('TetrisA-v0') env = JoypadSpace(env_, SIMPLE_MOVEMENT) # env = MaxAndSkipEnv(env) # env._max_episode_steps = env_._max_episode_steps max_episode_steps = 10000 env = WrapPyTorch(env, max_episode_steps) env.seed(cfg.seed) # print(env.ram) obs = env.reset() print(obs.shape) # env.seed(cfg.seed) env = utils.FrameStack(env, k=cfg.frame_stack) print("Init env done") # assert env.action_space.low.min() >= -1 # assert env.action_space.high.max() <= 1 return env
def _thunk(): mario_env = JoypadSpace(SuperMario_Env(world, stage, version), movement) if wrap_atari: mario_env._max_episode_steps = max_episode_steps * 4 mario_env = StickyActionEnv(mario_env) mario_env = MaxAndSkipEnv(mario_env, skip=4) mario_env = DummyMontezumaInfoWrapper(mario_env) mario_env = AddRandomStateToInfo(mario_env) # mario_env.seed(seed + rank) mario_env = Monitor( mario_env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True) if wrap_atari: mario_env = wrap_deepmind(mario_env) mario_env = BlocksWrapper(mario_env) mario_env.seed(seed) return mario_env
from nes_py.wrappers import JoypadSpace from Contra.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT, RIGHT_ONLY ENV_NAME = 'Contra-v0' CUSTOM_MOVEMENT = [ ['right'], ['right', 'A'], ['right', 'B'], ['right', 'A', 'B'], ] env = gym.make(ENV_NAME) env = JoypadSpace(env, RIGHT_ONLY) np.random.seed(120) env.seed(120) nb_actions = env.action_space.n print(env.observation_space.shape) print(env) model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Reshape(env.observation_space.shape)) model.add( Conv2D(32, (3, 3), activation='relu', input_shape=env.observation_space.shape)) model.add(MaxPooling2D((2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(Flatten())
# for idx, key in enumerate(keys): # axarr[idx].plot(episodes, data[key]) # axarr[idx].set_ylabel(key) # plt.xlabel('episodes') # plt.tight_layout() # if output is None: # plt.show() # else: # plt.savefig(output) ENV_NAME = 'CustomContra-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) env = JoypadSpace(env, CUSTOM_MOVEMENT) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n print(nb_actions) print(env.observation_space.shape) obs_dim = env.observation_space.shape[0] # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions))
print(device) Tensor = torch.Tensor LongTensor = torch.LongTensor env = gym_super_mario_bros.make('SuperMarioBros-v0') #env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) env = JoypadSpace(env, SIMPLE_MOVEMENT) #directory = './MarioVideos/' directory = './MarioVideosLong/' env = gym.wrappers.Monitor( env, directory, video_callable=lambda episode_id: episode_id % 5000 == 0) seed_value = 23 env.seed(seed_value) torch.manual_seed(seed_value) random.seed(seed_value) ###### PARAMS ###### learning_rate = 0.0001 #num_episodes = 5000 num_episodes = 9999999999 startNum = 500 #newModel = False newModel = False gamma = 0.99 hidden_layer = 512
def run_agent(agent, rendering=False, monitoring=False, print_reward=False): env = gym_super_mario_bros.make("SuperMarioBros-v0") env = JoypadSpace(env, SIMPLE_MOVEMENT) env.seed(42) if monitoring: env = Monitor(env, './video', force=True) agent.eval() state = env.reset() if rendering: env.render() #Conv2d without flatten() state = convert_image(state) #.flatten() state_list = [state, state, state, state] position = -1 global_reward = 0 s = 0 for _ in range(10000): #Conv2d input input = torch.from_numpy(np.array(state_list)).type('torch.FloatTensor')\ .unsqueeze(0) #Linear input #input = torch.tensor(state_list).type("torch.FloatTensor").view(1,-1) output_probabilities = agent(input).detach().numpy()[0] action = np.random.choice(range(action_count), 1, \ p=output_probabilities).item() new_state, reward, done, info = env.step(action) global_reward += reward s = s + 1 if rendering: env.render() state_list.pop() #Conv2d without flatten() state_list.append(convert_image(new_state)) #.flatten()) # if mario gets stuck, it gets punished and the loop gets broken if position == info["x_pos"]: stuck += 1 if stuck == 100: global_reward -= 100 break else: stuck = 0 position = info["x_pos"] #env.render() #Mario died if info["life"] < 2: break if print_reward: print(global_reward) return global_reward