def __init__(self, args): """ Creates an object from which new environments can be created :param args: """ if args.game.lower() == 'tetris': from tetris_emulator import TetrisEmulator self.num_actions = 5 self.create_environment = lambda i: TetrisEmulator(i, args) elif args.game in GYM_GAMES: from gym_emulator import GymEmulator import gym self.create_environment = lambda i: GymEmulator(i, args) env_test = gym.make(args.game) self.num_actions = env_test.action_space.n else: from atari_emulator import AtariEmulator from ale_python_interface import ALEInterface filename = args.rom_path + "/" + args.game + ".bin" ale_int = ALEInterface() ale_int.loadROM(str.encode(filename)) self.num_actions = len(ale_int.getMinimalActionSet()) self.create_environment = lambda i: AtariEmulator(i, args)
def __init__(self, args): """ Creates an object from which new environments can be created :param args: """ from atari_emulator import AtariEmulator from ale_py import ALEInterface filename = args.rom_path + "/" + args.game + ".bin" ale_int = ALEInterface() ale_int.loadROM(str.encode(filename)) self.num_actions = len(ale_int.getMinimalActionSet()) self.create_environment = lambda i: AtariEmulator(i, args)
def __init__(self, args): """ Creates an object from which new environments can be created :param args: """ if args.experiment_type == 'atari': from atari_emulator import AtariEmulator from ale_python_interface import ALEInterface filename = args.rom_path + "/" + args.game + ".bin" ale_int = ALEInterface() ale_int.loadROM(str.encode(filename)) self.num_actions = len(ale_int.getMinimalActionSet()) self.state_shape = (84, 84, 4) self.create_environment = lambda i: AtariEmulator(i, args) elif args.experiment_type == 'corridor': corridor_envs = { 'FrozenLake-v0': None, 'FrozenLakeNonskid4x4-v0': None, 'FrozenLakeNonskid8x8-v0': None, 'CorridorSmall-v1': CorridorEnv, 'CorridorSmall-v2': CorridorEnv, 'CorridorActionTest-v0': CorridorEnv, 'CorridorActionTest-v1': ComplexActionSetCorridorEnv, 'CorridorBig-v0': CorridorEnv, 'CorridorFLNonSkid-v1': CorridorEnv } corridor_game_id = args.game corridor_class = corridor_envs[args.game] env = GymEnvironment(-1, corridor_game_id, args.random_seed, env_class=corridor_class) self.num_actions = env.num_actions self.state_shape = tuple(env.shape) del env self.create_environment = lambda i: GymEnvironment(i, corridor_game_id, args.random_seed, env_class=corridor_class) else: import gym env = gym.make(args.game) s = env.reset() if type(s) == list or type(s) == tuple: self.state_shape = (len(s[0])+len(s[1]),) else: self.state_shape = list(env.observation_space.shape) self.num_actions = env.action_space.n del env self.create_environment = lambda i: gym.make(args.game)
def main(): parser = argparse.ArgumentParser('a program to train or run a deep q-learning agent') parser.add_argument("game", type=str, help="name of game to play") parser.add_argument("agent_type", type=str, help="name of learning/acting technique used") parser.add_argument("agent_name", type=str, help="unique name of this agent instance") parser.add_argument("--rom_path", type=str, help="path to directory containing atari game roms", default='../roms') parser.add_argument("--watch", help="if true, a pretrained model with the specified name is loaded and tested with the game screen displayed", action='store_true') parser.add_argument("--epochs", type=int, help="number of epochs", default=200) parser.add_argument("--epoch_length", type=int, help="number of steps in an epoch", default=250000) parser.add_argument("--test_steps", type=int, help="max number of steps per test", default=125000) parser.add_argument("--test_steps_hardcap", type=int, help="absolute max number of steps per test", default=135000) parser.add_argument("--test_episodes", type=int, help="max number of episodes per test", default=30) parser.add_argument("--history_length", type=int, help="number of frames in a state", default=4) parser.add_argument("--training_frequency", type=int, help="number of steps run before training", default=4) parser.add_argument("--random_exploration_length", type=int, help="number of randomly-generated experiences to initially fill experience memory", default=50000) parser.add_argument("--initial_exploration_rate", type=float, help="initial exploration rate", default=1.0) parser.add_argument("--final_exploration_rate", type=float, help="final exploration rate from linear annealing", default=0.1) parser.add_argument("--final_exploration_frame", type=int, help="frame at which the final exploration rate is reached", default=1000000) parser.add_argument("--test_exploration_rate", type=float, help="exploration rate while testing", default=0.05) parser.add_argument("--frame_skip", type=int, help="number of frames to repeat chosen action", default=4) parser.add_argument("--screen_dims", type=tuple, help="dimensions to resize frames", default=(84,84)) # used for stochasticity and to help prevent overfitting. # Must be greater than frame_skip * (observation_length -1) + buffer_length - 1 parser.add_argument("--max_start_wait", type=int, help="max number of frames to wait for initial state", default=60) # buffer_length = 1 prevents blending parser.add_argument("--buffer_length", type=int, help="length of buffer to blend frames", default=2) parser.add_argument("--blend_method", type=str, help="method used to blend frames", choices=('max'), default='max') parser.add_argument("--reward_processing", type=str, help="method to process rewards", choices=('clip', 'none'), default='clip') # must set network_architecture to custom in order use custom architecture parser.add_argument("--conv_kernel_shapes", type=tuple, help="shapes of convnet kernels: ((height, width, in_channels, out_channels), (next layer))") # must have same length as conv_kernel_shapes parser.add_argument("--conv_strides", type=tuple, help="connvet strides: ((1, height, width, 1), (next layer))") # currently, you must have at least one dense layer parser.add_argument("--dense_layer_shapes", type=tuple, help="shapes of dense layers: ((in_size, out_size), (next layer))") parser.add_argument("--discount_factor", type=float, help="constant to discount future rewards", default=0.99) parser.add_argument("--learning_rate", type=float, help="constant to scale parameter updates", default=0.00025) parser.add_argument("--optimizer", type=str, help="optimization method for network", choices=('rmsprop', 'graves_rmsprop'), default='rmsprop') parser.add_argument("--rmsprop_decay", type=float, help="decay constant for moving average in rmsprop", default=0.95) parser.add_argument("--rmsprop_epsilon", type=int, help="constant to stabilize rmsprop", default=0.01) # set error_clipping to less than 0 to disable parser.add_argument("--error_clipping", type=str, help="constant at which td-error becomes linear instead of quadratic", default=1.0) # set gradient clipping to 0 or less to disable. Currently only works with graves_rmsprop. parser.add_argument("--gradient_clip", type=str, help="clip gradients to have the provided L2-norm", default=0) parser.add_argument("--target_update_frequency", type=int, help="number of steps between target network updates", default=10000) parser.add_argument("--memory_capacity", type=int, help="max number of experiences to store in experience memory", default=1000000) parser.add_argument("--batch_size", type=int, help="number of transitions sampled from memory during learning", default=32) # must set to custom in order to specify custom architecture parser.add_argument("--network_architecture", type=str, help="name of prespecified network architecture", choices=("deepmind_nips", "deepmind_nature, custom"), default="deepmind_nature") parser.add_argument("--recording_frequency", type=int, help="number of steps before tensorboard recording", default=50000) parser.add_argument("--saving_threshold", type=int, help="min score threshold for saving model.", default=0) parser.add_argument("--parallel", help="parallelize acting and learning", action='store_true') parser.add_argument("--double_dqn", help="use double q-learning algorithm in error target calculation", action='store_true') args = parser.parse_args() if args.network_architecture == 'deepmind_nature': args.conv_kernel_shapes = [ [8,8,4,32], [4,4,32,64], [3,3,64,64]] args.conv_strides = [ [1,4,4,1], [1,2,2,1], [1,1,1,1]] args.dense_layer_shapes = [[3136, 512]] elif args.network_architecture == 'deepmind_nips': args.conv_kernel_shapes = [ [8,8,4,16], [4,4,16,32]] args.conv_strides = [ [1,4,4,1], [1,2,2,1]] args.dense_layer_shapes = [[2592, 256]] if not args.watch: train_stats = RecordStats(args, False) test_stats = RecordStats(args, True) training_emulator = AtariEmulator(args) testing_emulator = AtariEmulator(args) num_actions = len(training_emulator.get_possible_actions()) experience_memory = ExperienceMemory(args, num_actions) q_network= None agent = None if args.parallel: q_network = ParallelQNetwork(args, num_actions) agent = ParallelDQNAgent(args, q_network, training_emulator, experience_memory, num_actions, train_stats) else: q_network = QNetwork(args, num_actions) agent = DQNAgent(args, q_network, training_emulator, experience_memory, num_actions, train_stats) experiment.run_experiment(args, agent, testing_emulator, test_stats) else: testing_emulator = AtariEmulator(args) num_actions = len(testing_emulator.get_possible_actions()) q_network = QNetwork(args, num_actions) agent = DQNAgent(args, q_network, None, None, num_actions, None) experiment.evaluate_agent(args, agent, testing_emulator, None)
def main(): parser = argparse.ArgumentParser( 'a program to train or run a deep q-learning agent') parser.add_argument("game", type=str, help="name of game to play") parser.add_argument("agent_type", type=str, help="name of learning/acting technique used") parser.add_argument("agent_name", type=str, help="unique name of this agent instance") parser.add_argument("--rom_path", type=str, help="path to directory containing atari game roms", default='../roms') parser.add_argument( "--watch", help= "if true, a pretrained model with the specified name is loaded and tested with the game screen displayed", action='store_true') parser.add_argument("--epochs", type=int, help="number of epochs", default=200) parser.add_argument("--epoch_length", type=int, help="number of steps in an epoch", default=250000) parser.add_argument("--test_steps", type=int, help="max number of steps per test", default=125000) parser.add_argument("--test_steps_hardcap", type=int, help="absolute max number of steps per test", default=135000) parser.add_argument("--test_episodes", type=int, help="max number of episodes per test", default=30) parser.add_argument("--history_length", type=int, help="number of frames in a state", default=4) parser.add_argument("--training_frequency", type=int, help="number of steps run before training", default=4) parser.add_argument( "--random_exploration_length", type=int, help= "number of randomly-generated experiences to initially fill experience memory", default=50000) parser.add_argument("--initial_exploration_rate", type=float, help="initial exploration rate", default=1.0) parser.add_argument("--final_exploration_rate", type=float, help="final exploration rate from linear annealing", default=0.1) parser.add_argument( "--final_exploration_frame", type=int, help="frame at which the final exploration rate is reached", default=1000000) parser.add_argument("--test_exploration_rate", type=float, help="exploration rate while testing", default=0.05) parser.add_argument("--frame_skip", type=int, help="number of frames to repeat chosen action", default=4) parser.add_argument("--screen_dims", type=tuple, help="dimensions to resize frames", default=(84, 84)) # used for stochasticity and to help prevent overfitting. # Must be greater than frame_skip * (observation_length -1) + buffer_length - 1 parser.add_argument("--max_start_wait", type=int, help="max number of frames to wait for initial state", default=60) # buffer_length = 1 prevents blending parser.add_argument("--buffer_length", type=int, help="length of buffer to blend frames", default=2) parser.add_argument("--blend_method", type=str, help="method used to blend frames", choices=('max'), default='max') parser.add_argument("--reward_processing", type=str, help="method to process rewards", choices=('clip', 'none'), default='clip') # must set network_architecture to custom in order use custom architecture parser.add_argument( "--conv_kernel_shapes", type=tuple, help= "shapes of convnet kernels: ((height, width, in_channels, out_channels), (next layer))" ) # must have same length as conv_kernel_shapes parser.add_argument( "--conv_strides", type=tuple, help="connvet strides: ((1, height, width, 1), (next layer))") # currently, you must have at least one dense layer parser.add_argument( "--dense_layer_shapes", type=tuple, help="shapes of dense layers: ((in_size, out_size), (next layer))") parser.add_argument("--discount_factor", type=float, help="constant to discount future rewards", default=0.99) parser.add_argument("--learning_rate", type=float, help="constant to scale parameter updates", default=0.00025) parser.add_argument("--optimizer", type=str, help="optimization method for network", choices=('rmsprop', 'graves_rmsprop'), default='rmsprop') parser.add_argument("--rmsprop_decay", type=float, help="decay constant for moving average in rmsprop", default=0.95) parser.add_argument("--rmsprop_epsilon", type=int, help="constant to stabilize rmsprop", default=0.01) # set error_clipping to less than 0 to disable parser.add_argument( "--error_clipping", type=str, help="constant at which td-error becomes linear instead of quadratic", default=1.0) # set gradient clipping to 0 or less to disable. Currently only works with graves_rmsprop. parser.add_argument("--gradient_clip", type=str, help="clip gradients to have the provided L2-norm", default=0) parser.add_argument("--target_update_frequency", type=int, help="number of steps between target network updates", default=10000) parser.add_argument( "--memory_capacity", type=int, help="max number of experiences to store in experience memory", default=1000000) parser.add_argument( "--batch_size", type=int, help="number of transitions sampled from memory during learning", default=32) # must set to custom in order to specify custom architecture parser.add_argument("--network_architecture", type=str, help="name of prespecified network architecture", choices=("deepmind_nips", "deepmind_nature, custom"), default="deepmind_nature") parser.add_argument("--recording_frequency", type=int, help="number of steps before tensorboard recording", default=50000) parser.add_argument("--saving_threshold", type=int, help="min score threshold for saving model.", default=0) parser.add_argument("--parallel", help="parallelize acting and learning", action='store_true') parser.add_argument( "--double_dqn", help="use double q-learning algorithm in error target calculation", action='store_true') args = parser.parse_args() if args.network_architecture == 'deepmind_nature': args.conv_kernel_shapes = [[8, 8, 4, 32], [4, 4, 32, 64], [3, 3, 64, 64]] args.conv_strides = [[1, 4, 4, 1], [1, 2, 2, 1], [1, 1, 1, 1]] args.dense_layer_shapes = [[3136, 512]] elif args.network_architecture == 'deepmind_nips': args.conv_kernel_shapes = [[8, 8, 4, 16], [4, 4, 16, 32]] args.conv_strides = [[1, 4, 4, 1], [1, 2, 2, 1]] args.dense_layer_shapes = [[2592, 256]] if not args.watch: train_stats = RecordStats(args, False) test_stats = RecordStats(args, True) training_emulator = AtariEmulator(args) testing_emulator = AtariEmulator(args) num_actions = len(training_emulator.get_possible_actions()) experience_memory = ExperienceMemory(args, num_actions) q_network = None agent = None if args.parallel: q_network = ParallelQNetwork(args, num_actions) agent = ParallelDQNAgent(args, q_network, training_emulator, experience_memory, num_actions, train_stats) else: q_network = QNetwork(args, num_actions) agent = DQNAgent(args, q_network, training_emulator, experience_memory, num_actions, train_stats) experiment.run_experiment(args, agent, testing_emulator, test_stats) else: testing_emulator = AtariEmulator(args) num_actions = len(testing_emulator.get_possible_actions()) q_network = QNetwork(args, num_actions) agent = DQNAgent(args, q_network, None, None, num_actions, None) experiment.evaluate_agent(args, agent, testing_emulator, None)
def create_environment(): ale_int = ALEInterface() ale_int.loadROM(str.encode(BIN)) num_actions = len(ale_int.getMinimalActionSet()) return AtariEmulator(BIN), num_actions
if __name__ == "__main__": args = get_arg_parser().parse_args() from atari_emulator import AtariEmulator from ale_python_interface import ALEInterface filename = args.rom_path + "/" + args.game + ".bin" ale_int = ALEInterface() ale_int.loadROM(str.encode(filename)) num_actions = len(ale_int.getMinimalActionSet()) args.num_actions = num_actions args.random_seed = 3 ray.init() create_environment = lambda i: AtariEmulator.remote(i, args) emulators = np.asarray([create_environment(i) for i in range(4)]) variables = [(np.asarray([ ray.get(emulator.get_initial_state.remote()) for emulator in emulators ], dtype=np.uint8)), (np.zeros(4, dtype=np.float32)), (np.asarray([False] * 4, dtype=np.float32)), (np.zeros((4, num_actions), dtype=np.float32))] for step in range(10): for i, (emulator, action) in enumerate(zip(emulators, variables[-1])): new_s, reward, episode_over = ray.get(emulator.next.remote(action)) if episode_over: variables[0][i] = ray.get(emulator.get_initial_state.remote()) else: