def main(): parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) parser.add_argument("--output", "-o", type=str) parser.add_argument("--verbose", "-v", action="store_true") parser.add_argument("--loadfile", "-l", type=str) parser.add_argument("--save", "-s", action="store_true") parser.add_argument("--replayoff", "-r", action="store_true") parser.add_argument("--targetoff", "-t", action="store_true") parser.add_argument("--ramp-difficulty", default=False, action="store_true") parser.add_argument("--sticky-actions", default=False, action="store_true") parser.add_argument("--save-dataset", default=False, action="store_true") parser.add_argument("--num-frames", type=int, default=5000000) args = parser.parse_args() env = Environment(args.game, sticky_action_prob=0.1 if args.sticky_actions else 0.0, difficulty_ramping=args.ramp_difficulty) num_episodes = 100 num_actions = env.num_actions() reward_per_episode = [] episode_rewards = [] env.reset() for i in range(10000000): s = env.state() action = random.randrange(num_actions) reward, terminated = env.act(action) episode_rewards.append(reward) if terminated: reward_per_episode.append(numpy.sum(episode_rewards)) episode_rewards = [] if len(reward_per_episode) == num_episodes: break env.reset() print(numpy.mean(reward_per_episode))
class BaseEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, display_time=50, **kwargs): self.game_name = 'Game Name' self.display_time = display_time self.init(**kwargs) def init(self, **kwargs): self.game = Environment(env_name=self.game_name, **kwargs) self.action_set = self.game.env.action_map self.action_space = spaces.Discrete(self.game.num_actions()) self.observation_space = spaces.Box(0.0, 1.0, shape=self.game.state_shape(), dtype=np.float32) def step(self, action): reward, done = self.game.act(action) return (self.game.state(), reward, done, {}) def reset(self): self.game.reset() return self.game.state() def seed(self, seed=None): self.game = Environment(env_name=self.game_name, random_seed=seed) return seed def render(self, mode='human'): if mode == 'rgb_array': return self.game.state() elif mode == 'human': self.game.display_state(self.display_time) def close(self): if self.game.visualized: self.game.close_display() return 0
# python3 random_play.py -g <game> # # ################################################################################################################ import random, numpy, argparse from minatar import Environment NUM_EPISODES = 1000 parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) args = parser.parse_args() env = Environment(args.game) e = 0 returns = [] num_actions = env.num_actions() # Run NUM_EPISODES episodes and log all returns while e < NUM_EPISODES: # Initialize the return for every episode G = 0.0 # Initialize the environment env.reset() terminated = False #Obtain first state, unused by random agent, but inluded for illustration s = env.state() while (not terminated): # Select an action uniformly at random action = random.randrange(num_actions)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--game", "-g", type=str) parser.add_argument("--output", "-o", type=str) parser.add_argument("--verbose", "-v", action="store_true") parser.add_argument("--loadfile", "-l", type=str) parser.add_argument("--alpha", "-a", type=float, default=STEP_SIZE) parser.add_argument("--save", "-s", action="store_true") parser.add_argument("--replayoff", "-r", action="store_true") parser.add_argument("--targetoff", "-t", action="store_true") parser.add_argument("--ramp-difficulty", default=False, action="store_true") parser.add_argument("--sticky-actions", default=False, action="store_true") parser.add_argument("--save-dataset", default=False, action="store_true") parser.add_argument("--num-frames", type=int, default=5000000) args = parser.parse_args() global NUM_FRAMES NUM_FRAMES = args.num_frames if args.verbose: logging.basicConfig(level=logging.INFO) # If there's an output specified, then use the user specified output. Otherwise, create file in the current # directory with the game's name. if args.output: file_name = args.output else: file_name = os.getcwd() + "/" + args.game load_file_path = None if args.loadfile: load_file_path = args.loadfile env = Environment(args.game, sticky_action_prob=0.1 if args.sticky_actions else 0.0, difficulty_ramping=args.ramp_difficulty) print('Cuda available?: ' + str(torch.cuda.is_available())) policy_net = dqn(env, args.replayoff, args.targetoff, file_name, args.save, load_file_path, args.alpha) if args.save_dataset: epsilon = 0.1 num_steps = 100000 num_actions = env.num_actions() transitions = [] env.reset() for i in range(num_steps): if i % 1000 == 0: logging.info("data collection step {:d}".format(i)) s = env.state() s_t = get_state(s) with torch.no_grad(): q_values = policy_net(s_t) if numpy.random.uniform(0, 1) < epsilon: action = torch.tensor([[random.randrange(num_actions)]], device=device) else: action = q_values.max(1)[1].view(1, 1) reward, terminated = env.act(action) s_prime = env.state() s_prime_t = get_state(s_prime) with torch.no_grad(): q_values_prime = policy_net(s_prime_t) t = Transition(s, int(action.cpu().numpy()[0, 0]), float(reward), s_prime, False, bool(terminated), q_values=q_values.cpu().numpy(), next_q_values=q_values_prime.cpu().numpy()) transitions.append(t) if terminated: env.reset() file_name = os.path.join("dataset", "{:s}.pickle".format(args.game)) with open(file_name, "wb") as file: pickle.dump(transitions, file)