def setup_env(env_id: str, monitor_dir: str = None) -> gym.Env: """ Make and environment and set it up with wrappers. Args: env_id: the id for the environment to load output_dir: the output directory to route monitor output to Returns: a loaded and wrapped Open AI Gym environment """ if 'Tetris' in env_id: import gym_tetris env = gym_tetris.make(env_id) env = gym_tetris.wrap(env, clip_rewards=False) elif 'SuperMarioBros' in env_id: import gym_super_mario_bros env = gym_super_mario_bros.make(env_id) env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT) env = nes_py_wrap(env) else: env = build_atari_environment(env_id) if monitor_dir is not None: env = gym.wrappers.Monitor(env, monitor_dir, force=True) return env
def __init__(self, env, seed, max_episode_length, action_repeat, bit_depth, args): from nes_py.wrappers import JoypadSpace import gym_tetris from gym_tetris.actions import SIMPLE_MOVEMENT self._env = gym_tetris.make(env,skip_level=True) self._env.seed(seed) self._env = JoypadSpace(self._env, SIMPLE_MOVEMENT) self.max_episode_length = max_episode_length self.action_repeat = action_repeat self.bit_depth = bit_depth self.small_image = args.small_image self.add_reward = args.add_reward self.typeb = "1" in env self.acc = 0.03 if self.typeb else 3 self.living = 0.003 if self.typeb else 0.3 self.dim=1 if args.binary_image else 3 if args.binary_image: self._process_obs=_images_to_observation_binary else: self._process_obs=_images_to_observation self.one_skip=False if not args.add_reward: self.acc=0 self.living=0
def play(self): env = gym_tetris.make('TetrisA-v0') env = JoypadSpace(env, MOVEMENT) state = env.reset() model = self.global_model model_path = os.path.join(self.save_dir, 'model_{}.h5'.format('Tetris')) print('Loading model from: {}'.format(model_path)) model.load_weights(model_path) done = False step_counter = 0 reward_sum = 0 pieza_colocada = True informacion = env.get_info() antiguo_statistics = informacion['statistics'] state = [0, 0, 0, 0] while not done: env.render() if pieza_colocada: pieza_colocada = False pos = 5 giro = 0 u = -1 state = [state] policy, value = model( tf.convert_to_tensor(state, dtype=tf.float32)) policy = tf.nn.softmax(policy) action = np.argmax(policy) pos_objetivo = action % 10 giro_objetivo = action // 10 if (giro % giro_objetivo) != 0 and not done: state, reward, done, info = env.step(1) accion = 0 giro = giro + 1 elif pos > pos_objetivo and not done: state, reward, done, info = env.step(6) pos = pos - 1 accion = 0 elif pos < pos_objetivo and not done: state, reward, done, info = env.step(3) pos = pos + 1 accion = 0 elif not done and not pieza_colocada: state, reward, done, info = env.step(9) accion = 9 else: accion = 0 if not done: state, reward, done, info = env.step(accion) env.render() informacion = env.get_info() if antiguo_statistics != informacion['statistics']: antiguo_statistics = informacion['statistics'] step_counter += 1 env.close()
def __init__(self): self.save_dir = '/' env = gym_tetris.make('TetrisA-v0') self.state_size = 4 self.action_size = 40 self.opt = tf.train.AdamOptimizer(0.001, use_locking=True) print(self.state_size, self.action_size) self.global_model = ActorCriticModel( self.state_size, self.action_size) # global network self.global_model( tf.convert_to_tensor(np.random.random((1, self.state_size)), dtype=tf.float32))
def work(self): self.env = gym_tetris.make('TetrisA-v0') self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT) self.env.reset() action = np.argmax(self.env.action_space.sample()) ob, _, _, _ = self.env.step(int(action)) inx = 10 iny = 20 done = False # net = neat.nn.FeedForwardNetwork.create(self.genome, self.config) net = neat.nn.recurrent.RecurrentNetwork.create( self.genome, self.config) fitness = 0 xpos = 0 xpos_max = 16 counter = 0 max_score = 0 moving = 0 frames = 0 while not done: scaledimg = cv2.cvtColor(ob, cv2.COLOR_BGR2RGB) ob = Minimize(ob) ob = cv2.resize(ob, (10, 20)) cv2.imshow('humanwin', scaledimg) cv2.waitKey(1) imgarray = np.ndarray.flatten(ob) actions = net.activate(imgarray) action = np.argmax(actions) ob, rew, done, info = self.env.step(int(action)) frames += 1 if frames == 1200: fitness += 1 frames = 0 print( f"genome:{self.genome.key} Fitnes: {fitness} lines: {info['number_of_lines']}" ) return int(fitness)
def main(): print("Creating environment...") environment = gym_tetris.make('Tetris-v0') print("Creating model...") model = modelutils.create_model(number_of_actions) model.summary() print("Creating agent...") if agent_type == "dqn": agent = DQNAgent( name="tetris-dqn", environment=environment, model=model, observation_transformation=utils.resize_and_bgr2gray, observation_frames=4, number_of_iterations=1000000, gamma=0.95, final_epsilon=0.01, initial_epsilon=1.0, replay_memory_size=2000, minibatch_size=32 ) elif agent_type == "ddqn": agent = DDQNAgent( name="tetris-ddqn", environment=environment, model=model, observation_transformation=utils.resize_and_bgr2gray, observation_frames=4, number_of_iterations=1000000, gamma=0.95, final_epsilon=0.01, initial_epsilon=1.0, replay_memory_size=2000, minibatch_size=32, model_copy_interval=100 ) agent.enable_rewards_tracking(rewards_running_means_length=10000) agent.enable_episodes_tracking(episodes_running_means_length=100) agent.enable_maxq_tracking(maxq_running_means_length=10000) agent.enable_model_saving(model_save_frequency=10000) agent.enable_plots_saving(plots_save_frequency=10000) print("Training ...") agent.fit(verbose=True, headless="headless" in sys.argv, render_states=True)
def main(): print("Loading model...") if len(sys.argv) == 1: model_paths = glob.glob(os.path.join("pretrained_models", "*.h5")) model_paths = [model_path for model_path in model_paths if "tetris" in model_path] assert len(model_paths) != 0, "Did not find any models." model_paths = sorted(model_paths) model_path = model_paths[-1] else: model_path = sys.argv[1] model = models.load_model(model_path) print(model_path, "loaded.") print("Creating environment...") environment = gym_tetris.make('Tetris-v0') print("Running ...") run(model, environment, verbose="verbose" in sys.argv)
def make_env(cfg): """Helper function to create dm_control environment""" if cfg.env == 'ball_in_cup_catch': domain_name = 'ball_in_cup' task_name = 'catch' elif cfg.env == 'point_mass_easy': domain_name = 'point_mass' task_name = 'easy' else: domain_name = cfg.env.split('_')[0] task_name = '_'.join(cfg.env.split('_')[1:]) # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26 camera_id = 2 if domain_name == 'quadruped' else 0 # env = dmc2gym.make(domain_name=domain_name, # task_name=task_name, # seed=cfg.seed, # visualize_reward=False, # from_pixels=True, # height=cfg.image_size, # width=cfg.image_size, # frame_skip=cfg.action_repeat, # camera_id=camera_id) # env = gym.make("CarRacing-v0") env_ = gym_tetris.make('TetrisA-v0') env = JoypadSpace(env_, SIMPLE_MOVEMENT) # env = MaxAndSkipEnv(env) # env._max_episode_steps = env_._max_episode_steps max_episode_steps = 10000 env = WrapPyTorch(env, max_episode_steps) env.seed(cfg.seed) # print(env.ram) obs = env.reset() print(obs.shape) # env.seed(cfg.seed) env = utils.FrameStack(env, k=cfg.frame_stack) print("Init env done") # assert env.action_space.low.min() >= -1 # assert env.action_space.high.max() <= 1 return env
def rank(self): for model in self.models: env = gym_tetris.make('TetrisA-v0') env = JoypadSpace(env, SIMPLE_MOVEMENT) env.reset() done = False info = None while not done: # Generate all options options = [[Action.DROP]] for x in range(1, 5): left_option = [Action.LEFT] * x left_option.append(Action.DROP) options.append(left_option) right_option = [Action.RIGHT] * x right_option.append(Action.DROP) options.append(right_option) # Enumerate all choices boards = [] for option in options: # Back-up the environment first env.unwrapped._backup() # Run the sequence of actions state = None for action in option: state, _, _, _ = env.step(action.value) # Now, parse the board from the state board = parse_blocks(state) boards.append(board) env.unwrapped._restore() # Choose the best option genetically choice = model.best(boards) for action in options[choice]: _, _, done, info = env.step(action.value) model.fitness = info['score'] self.models = sorted(self.models, key=lambda model: model.fitness)
def __init__(self, env, symbolic, seed, max_episode_length, action_repeat, bit_depth, args): self.symbolic = symbolic # self._env = gym.make(env) self._env = gym_tetris.make(env, skip_level=True) self._env.seed(seed) self._env = JoypadSpace(self._env, SIMPLE_MOVEMENT) if symbolic: self._env = SymbolTetris(self._env) self.max_episode_length = max_episode_length self.action_repeat = 1 self.bit_depth = bit_depth self.typeb = "1" in env self.acc = 0.01 if self.typeb else 3 self.living = 0.01 if self.typeb else 0.3 self.die = -50 self.score = 0.0 self.add_reward = args.add_reward if not args.add_reward: self.acc = 0 self.living = 0 self.die = 0
def __init__(self, state_size, action_size, global_model, opt, result_queue, idx, game_name='Tetris', save_dir='/tmp'): super(Worker, self).__init__() self.state_size = state_size self.action_size = action_size self.result_queue = result_queue self.global_model = global_model self.opt = opt self.local_model = ActorCriticModel(self.state_size, self.action_size) self.worker_idx = idx self.env = gym_tetris.make('TetrisA-v0') self.env = JoypadSpace(self.env, MOVEMENT) self.save_dir = save_dir self.ep_loss = 0.0 self.game_name = 'Tetris'
def __init__(self, env_id, seed, max_episode_length=1000): super(GameEnv, self).__init__() extra_args = ENV_GAMES_ARGS.get(env_id, {}) self.env_id = env_id if env_id == "TetrisA-v2": self._env = JoypadSpace(gym_tetris.make(env_id, **extra_args), SIMPLE_MOVEMENT) elif "ple" in env_id: self._env = gym_ple.make(env_id, **extra_args) elif "MiniGrid" in env_id: # self._env = AbsoluteActionGrid(FullyObsWrapper(gym.make(env_id))) self._env = AbsoluteActionGrid(RGBImgObsWrapper(gym.make(env_id))) elif "Sokoban" in env_id: self._env = TinySokoban(gym.make(env_id, **extra_args)) elif "MazeEnv" in env_id: self._env = MazeEnvImage(mazenv.Env(mazenv.prim((8, 8))), randomize=True) else: self._env = gym.make(env_id, **extra_args) self._env.seed(seed) self.action_repeat = GAME_ENVS_ACTION_REPEATS.get(env_id, 1) self.max_episode_length = max_episode_length * self.action_repeat self.t = 0
def _thunk(): env = gym_tetris.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3: env = JoypadSpace(env, SIMPLE_MOVEMENT) # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env) return env
from nes_py.wrappers import JoypadSpace import gym_tetris from gym_tetris.actions import SIMPLE_MOVEMENT import time import numpy as np import matplotlib.pyplot as plt import cv2 import keyboard win_name = 'Tetris' env = gym_tetris.make('TetrisA-v0') env = JoypadSpace(env, SIMPLE_MOVEMENT) done = True def get_keyboard_action(): while True: # x = '0123456789' # x = [i for i in x if keyboard.is_pressed(i)] # x = [*x, '0'] # return int(x[0]) if keyboard.is_pressed('a'): return 1 if keyboard.is_pressed('d'): return 2 return 0
import numpy as np import tensorflow as tf import time import matplotlib.pyplot as plt from nes_py.wrappers import JoypadSpace import gym_tetris import gym from gym_tetris.actions import SIMPLE_MOVEMENT env = gym_tetris.make("TetrisA-v2") env = JoypadSpace(env, SIMPLE_MOVEMENT) #action size is NES simple input for tetris #state size is (240, 256, 3) num_actions = len(SIMPLE_MOVEMENT) state_size = 214 episode = 0 running = True isTrained = False state = env.reset() def getBoard(state): state = state[48:208, 96:176] #Get tetris board only (not whole screen) state = np.mean(state, -1) #take middle of pixel board = np.zeros((20, 10)) for line in range(4, len(state), 8):
def env_creator(env_config): env = gym_tetris.make(version) env = JoypadSpace(env, MOVEMENT) env = WarpFrame(env, dim=84) return env
def dqn(): env = gym_tetris.make('TetrisA-v2') env = JoypadSpace(env, MOVEMENT) episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score)