def __init__(self, args):
        """
        Creates an object from which new environments can be created
        :param args:
        """
        if args.game.lower() == 'tetris':
            from tetris_emulator import TetrisEmulator
            self.num_actions = 5
            self.create_environment = lambda i: TetrisEmulator(i, args)

        elif args.game in GYM_GAMES:
            from gym_emulator import GymEmulator
            import gym
            self.create_environment = lambda i: GymEmulator(i, args)
            env_test = gym.make(args.game)
            self.num_actions = env_test.action_space.n

        else:
            from atari_emulator import AtariEmulator
            from ale_python_interface import ALEInterface
            filename = args.rom_path + "/" + args.game + ".bin"
            ale_int = ALEInterface()
            ale_int.loadROM(str.encode(filename))
            self.num_actions = len(ale_int.getMinimalActionSet())
            self.create_environment = lambda i: AtariEmulator(i, args)
Пример #2
0
 def __init__(self, args):
     """
     Creates an object from which new environments can be created
     :param args:
     """
     from atari_emulator import AtariEmulator
     from ale_py import ALEInterface
     filename = args.rom_path + "/" + args.game + ".bin"
     ale_int = ALEInterface()
     ale_int.loadROM(str.encode(filename))
     self.num_actions = len(ale_int.getMinimalActionSet())
     self.create_environment = lambda i: AtariEmulator(i, args)
Пример #3
0
    def __init__(self, args):
        """
        Creates an object from which new environments can be created
        :param args:
        """
        if args.experiment_type == 'atari':
            from atari_emulator import AtariEmulator
            from ale_python_interface import ALEInterface
            filename = args.rom_path + "/" + args.game + ".bin"
            ale_int = ALEInterface()
            ale_int.loadROM(str.encode(filename))
            self.num_actions = len(ale_int.getMinimalActionSet())
            self.state_shape = (84, 84, 4)
            self.create_environment = lambda i: AtariEmulator(i, args)
        
        elif args.experiment_type == 'corridor':
            corridor_envs = {
                    'FrozenLake-v0': None,
                    'FrozenLakeNonskid4x4-v0': None,
                    'FrozenLakeNonskid8x8-v0': None,
                    'CorridorSmall-v1': CorridorEnv,
                    'CorridorSmall-v2': CorridorEnv,
                    'CorridorActionTest-v0': CorridorEnv,
                    'CorridorActionTest-v1': ComplexActionSetCorridorEnv,
                    'CorridorBig-v0': CorridorEnv,
                    'CorridorFLNonSkid-v1': CorridorEnv
                }
            
            corridor_game_id = args.game
            corridor_class = corridor_envs[args.game]
            env = GymEnvironment(-1, corridor_game_id, args.random_seed, env_class=corridor_class)
            self.num_actions = env.num_actions
            self.state_shape = tuple(env.shape)
            del env
            self.create_environment = lambda i: GymEnvironment(i, corridor_game_id, args.random_seed, env_class=corridor_class)
        else:
            import gym
            env = gym.make(args.game)
            s = env.reset()
            if type(s) == list or type(s) == tuple:
                self.state_shape = (len(s[0])+len(s[1]),)
            else:
                self.state_shape = list(env.observation_space.shape)
            self.num_actions = env.action_space.n

            del env
            self.create_environment = lambda i: gym.make(args.game)
Пример #4
0
def main():

	parser = argparse.ArgumentParser('a program to train or run a deep q-learning agent')
	parser.add_argument("game", type=str, help="name of game to play")
	parser.add_argument("agent_type", type=str, help="name of learning/acting technique used")
	parser.add_argument("agent_name", type=str, help="unique name of this agent instance")
	parser.add_argument("--rom_path", type=str, help="path to directory containing atari game roms", default='../roms')
	parser.add_argument("--watch",
		help="if true, a pretrained model with the specified name is loaded and tested with the game screen displayed", 
		action='store_true')

	parser.add_argument("--epochs", type=int, help="number of epochs", default=200)
	parser.add_argument("--epoch_length", type=int, help="number of steps in an epoch", default=250000)
	parser.add_argument("--test_steps", type=int, help="max number of steps per test", default=125000)
	parser.add_argument("--test_steps_hardcap", type=int, help="absolute max number of steps per test", default=135000)
	parser.add_argument("--test_episodes", type=int, help="max number of episodes per test", default=30)
	parser.add_argument("--history_length", type=int, help="number of frames in a state", default=4)
	parser.add_argument("--training_frequency", type=int, help="number of steps run before training", default=4)
	parser.add_argument("--random_exploration_length", type=int, 
		help="number of randomly-generated experiences to initially fill experience memory", default=50000)
	parser.add_argument("--initial_exploration_rate", type=float, help="initial exploration rate", default=1.0)
	parser.add_argument("--final_exploration_rate", type=float, help="final exploration rate from linear annealing", default=0.1)
	parser.add_argument("--final_exploration_frame", type=int, 
		help="frame at which the final exploration rate is reached", default=1000000)
	parser.add_argument("--test_exploration_rate", type=float, help="exploration rate while testing", default=0.05)
	parser.add_argument("--frame_skip", type=int, help="number of frames to repeat chosen action", default=4)
	parser.add_argument("--screen_dims", type=tuple, help="dimensions to resize frames", default=(84,84))
	# used for stochasticity and to help prevent overfitting.  
	# Must be greater than frame_skip * (observation_length -1) + buffer_length - 1
	parser.add_argument("--max_start_wait", type=int, help="max number of frames to wait for initial state", default=60)
	# buffer_length = 1 prevents blending
	parser.add_argument("--buffer_length", type=int, help="length of buffer to blend frames", default=2)
	parser.add_argument("--blend_method", type=str, help="method used to blend frames", choices=('max'), default='max')
	parser.add_argument("--reward_processing", type=str, help="method to process rewards", choices=('clip', 'none'), default='clip')
	# must set network_architecture to custom in order use custom architecture
	parser.add_argument("--conv_kernel_shapes", type=tuple, 
		help="shapes of convnet kernels: ((height, width, in_channels, out_channels), (next layer))")
	# must have same length as conv_kernel_shapes
	parser.add_argument("--conv_strides", type=tuple, help="connvet strides: ((1, height, width, 1), (next layer))")
	# currently,  you must have at least one dense layer
	parser.add_argument("--dense_layer_shapes", type=tuple, help="shapes of dense layers: ((in_size, out_size), (next layer))")
	parser.add_argument("--discount_factor", type=float, help="constant to discount future rewards", default=0.99)
	parser.add_argument("--learning_rate", type=float, help="constant to scale parameter updates", default=0.00025)
	parser.add_argument("--optimizer", type=str, help="optimization method for network", 
		choices=('rmsprop', 'graves_rmsprop'), default='rmsprop')
	parser.add_argument("--rmsprop_decay", type=float, help="decay constant for moving average in rmsprop", default=0.95)
	parser.add_argument("--rmsprop_epsilon", type=int, help="constant to stabilize rmsprop", default=0.01)
	# set error_clipping to less than 0 to disable
	parser.add_argument("--error_clipping", type=str, help="constant at which td-error becomes linear instead of quadratic", default=1.0)
	# set gradient clipping to 0 or less to disable.  Currently only works with graves_rmsprop.
	parser.add_argument("--gradient_clip", type=str, help="clip gradients to have the provided L2-norm", default=0)
	parser.add_argument("--target_update_frequency", type=int, help="number of steps between target network updates", default=10000)
	parser.add_argument("--memory_capacity", type=int, help="max number of experiences to store in experience memory", default=1000000)
	parser.add_argument("--batch_size", type=int, help="number of transitions sampled from memory during learning", default=32)
	# must set to custom in order to specify custom architecture
	parser.add_argument("--network_architecture", type=str, help="name of prespecified network architecture", 
		choices=("deepmind_nips", "deepmind_nature, custom"), default="deepmind_nature")
	parser.add_argument("--recording_frequency", type=int, help="number of steps before tensorboard recording", default=50000)

	parser.add_argument("--saving_threshold", type=int, help="min score threshold for saving model.", default=0)

	parser.add_argument("--parallel", help="parallelize acting and learning", action='store_true')
	parser.add_argument("--double_dqn", help="use double q-learning algorithm in error target calculation", action='store_true')
	args = parser.parse_args()


	if args.network_architecture == 'deepmind_nature':
		args.conv_kernel_shapes = [
			[8,8,4,32],
			[4,4,32,64],
			[3,3,64,64]]
		args.conv_strides = [
			[1,4,4,1],
			[1,2,2,1],
			[1,1,1,1]]
		args.dense_layer_shapes = [[3136, 512]]
	elif args.network_architecture == 'deepmind_nips':
		args.conv_kernel_shapes = [
			[8,8,4,16],
			[4,4,16,32]]
		args.conv_strides = [
			[1,4,4,1],
			[1,2,2,1]]
		args.dense_layer_shapes = [[2592, 256]]

	if not args.watch:
		train_stats = RecordStats(args, False)
		test_stats = RecordStats(args, True)
		training_emulator = AtariEmulator(args)
		testing_emulator = AtariEmulator(args)
		num_actions = len(training_emulator.get_possible_actions())
		experience_memory = ExperienceMemory(args, num_actions)

		q_network= None
		agent = None
		if args.parallel:
			q_network = ParallelQNetwork(args, num_actions)
			agent = ParallelDQNAgent(args, q_network, training_emulator, experience_memory, num_actions, train_stats)
		else:
			q_network = QNetwork(args, num_actions)
			agent = DQNAgent(args, q_network, training_emulator, experience_memory, num_actions, train_stats)

		experiment.run_experiment(args, agent, testing_emulator, test_stats)

	else:
		testing_emulator = AtariEmulator(args)
		num_actions = len(testing_emulator.get_possible_actions())
		q_network = QNetwork(args, num_actions)
		agent = DQNAgent(args, q_network, None, None, num_actions, None)
		experiment.evaluate_agent(args, agent, testing_emulator, None)
Пример #5
0
def main():

    parser = argparse.ArgumentParser(
        'a program to train or run a deep q-learning agent')
    parser.add_argument("game", type=str, help="name of game to play")
    parser.add_argument("agent_type",
                        type=str,
                        help="name of learning/acting technique used")
    parser.add_argument("agent_name",
                        type=str,
                        help="unique name of this agent instance")
    parser.add_argument("--rom_path",
                        type=str,
                        help="path to directory containing atari game roms",
                        default='../roms')
    parser.add_argument(
        "--watch",
        help=
        "if true, a pretrained model with the specified name is loaded and tested with the game screen displayed",
        action='store_true')

    parser.add_argument("--epochs",
                        type=int,
                        help="number of epochs",
                        default=200)
    parser.add_argument("--epoch_length",
                        type=int,
                        help="number of steps in an epoch",
                        default=250000)
    parser.add_argument("--test_steps",
                        type=int,
                        help="max number of steps per test",
                        default=125000)
    parser.add_argument("--test_steps_hardcap",
                        type=int,
                        help="absolute max number of steps per test",
                        default=135000)
    parser.add_argument("--test_episodes",
                        type=int,
                        help="max number of episodes per test",
                        default=30)
    parser.add_argument("--history_length",
                        type=int,
                        help="number of frames in a state",
                        default=4)
    parser.add_argument("--training_frequency",
                        type=int,
                        help="number of steps run before training",
                        default=4)
    parser.add_argument(
        "--random_exploration_length",
        type=int,
        help=
        "number of randomly-generated experiences to initially fill experience memory",
        default=50000)
    parser.add_argument("--initial_exploration_rate",
                        type=float,
                        help="initial exploration rate",
                        default=1.0)
    parser.add_argument("--final_exploration_rate",
                        type=float,
                        help="final exploration rate from linear annealing",
                        default=0.1)
    parser.add_argument(
        "--final_exploration_frame",
        type=int,
        help="frame at which the final exploration rate is reached",
        default=1000000)
    parser.add_argument("--test_exploration_rate",
                        type=float,
                        help="exploration rate while testing",
                        default=0.05)
    parser.add_argument("--frame_skip",
                        type=int,
                        help="number of frames to repeat chosen action",
                        default=4)
    parser.add_argument("--screen_dims",
                        type=tuple,
                        help="dimensions to resize frames",
                        default=(84, 84))
    # used for stochasticity and to help prevent overfitting.
    # Must be greater than frame_skip * (observation_length -1) + buffer_length - 1
    parser.add_argument("--max_start_wait",
                        type=int,
                        help="max number of frames to wait for initial state",
                        default=60)
    # buffer_length = 1 prevents blending
    parser.add_argument("--buffer_length",
                        type=int,
                        help="length of buffer to blend frames",
                        default=2)
    parser.add_argument("--blend_method",
                        type=str,
                        help="method used to blend frames",
                        choices=('max'),
                        default='max')
    parser.add_argument("--reward_processing",
                        type=str,
                        help="method to process rewards",
                        choices=('clip', 'none'),
                        default='clip')
    # must set network_architecture to custom in order use custom architecture
    parser.add_argument(
        "--conv_kernel_shapes",
        type=tuple,
        help=
        "shapes of convnet kernels: ((height, width, in_channels, out_channels), (next layer))"
    )
    # must have same length as conv_kernel_shapes
    parser.add_argument(
        "--conv_strides",
        type=tuple,
        help="connvet strides: ((1, height, width, 1), (next layer))")
    # currently,  you must have at least one dense layer
    parser.add_argument(
        "--dense_layer_shapes",
        type=tuple,
        help="shapes of dense layers: ((in_size, out_size), (next layer))")
    parser.add_argument("--discount_factor",
                        type=float,
                        help="constant to discount future rewards",
                        default=0.99)
    parser.add_argument("--learning_rate",
                        type=float,
                        help="constant to scale parameter updates",
                        default=0.00025)
    parser.add_argument("--optimizer",
                        type=str,
                        help="optimization method for network",
                        choices=('rmsprop', 'graves_rmsprop'),
                        default='rmsprop')
    parser.add_argument("--rmsprop_decay",
                        type=float,
                        help="decay constant for moving average in rmsprop",
                        default=0.95)
    parser.add_argument("--rmsprop_epsilon",
                        type=int,
                        help="constant to stabilize rmsprop",
                        default=0.01)
    # set error_clipping to less than 0 to disable
    parser.add_argument(
        "--error_clipping",
        type=str,
        help="constant at which td-error becomes linear instead of quadratic",
        default=1.0)
    # set gradient clipping to 0 or less to disable.  Currently only works with graves_rmsprop.
    parser.add_argument("--gradient_clip",
                        type=str,
                        help="clip gradients to have the provided L2-norm",
                        default=0)
    parser.add_argument("--target_update_frequency",
                        type=int,
                        help="number of steps between target network updates",
                        default=10000)
    parser.add_argument(
        "--memory_capacity",
        type=int,
        help="max number of experiences to store in experience memory",
        default=1000000)
    parser.add_argument(
        "--batch_size",
        type=int,
        help="number of transitions sampled from memory during learning",
        default=32)
    # must set to custom in order to specify custom architecture
    parser.add_argument("--network_architecture",
                        type=str,
                        help="name of prespecified network architecture",
                        choices=("deepmind_nips", "deepmind_nature, custom"),
                        default="deepmind_nature")
    parser.add_argument("--recording_frequency",
                        type=int,
                        help="number of steps before tensorboard recording",
                        default=50000)

    parser.add_argument("--saving_threshold",
                        type=int,
                        help="min score threshold for saving model.",
                        default=0)

    parser.add_argument("--parallel",
                        help="parallelize acting and learning",
                        action='store_true')
    parser.add_argument(
        "--double_dqn",
        help="use double q-learning algorithm in error target calculation",
        action='store_true')
    args = parser.parse_args()

    if args.network_architecture == 'deepmind_nature':
        args.conv_kernel_shapes = [[8, 8, 4, 32], [4, 4, 32, 64],
                                   [3, 3, 64, 64]]
        args.conv_strides = [[1, 4, 4, 1], [1, 2, 2, 1], [1, 1, 1, 1]]
        args.dense_layer_shapes = [[3136, 512]]
    elif args.network_architecture == 'deepmind_nips':
        args.conv_kernel_shapes = [[8, 8, 4, 16], [4, 4, 16, 32]]
        args.conv_strides = [[1, 4, 4, 1], [1, 2, 2, 1]]
        args.dense_layer_shapes = [[2592, 256]]

    if not args.watch:
        train_stats = RecordStats(args, False)
        test_stats = RecordStats(args, True)
        training_emulator = AtariEmulator(args)
        testing_emulator = AtariEmulator(args)
        num_actions = len(training_emulator.get_possible_actions())
        experience_memory = ExperienceMemory(args, num_actions)

        q_network = None
        agent = None
        if args.parallel:
            q_network = ParallelQNetwork(args, num_actions)
            agent = ParallelDQNAgent(args, q_network, training_emulator,
                                     experience_memory, num_actions,
                                     train_stats)
        else:
            q_network = QNetwork(args, num_actions)
            agent = DQNAgent(args, q_network, training_emulator,
                             experience_memory, num_actions, train_stats)

        experiment.run_experiment(args, agent, testing_emulator, test_stats)

    else:
        testing_emulator = AtariEmulator(args)
        num_actions = len(testing_emulator.get_possible_actions())
        q_network = QNetwork(args, num_actions)
        agent = DQNAgent(args, q_network, None, None, num_actions, None)
        experiment.evaluate_agent(args, agent, testing_emulator, None)
Пример #6
0
def create_environment():
    ale_int = ALEInterface()
    ale_int.loadROM(str.encode(BIN))
    num_actions = len(ale_int.getMinimalActionSet())
    return AtariEmulator(BIN), num_actions
Пример #7
0
if __name__ == "__main__":
    args = get_arg_parser().parse_args()

    from atari_emulator import AtariEmulator
    from ale_python_interface import ALEInterface

    filename = args.rom_path + "/" + args.game + ".bin"
    ale_int = ALEInterface()
    ale_int.loadROM(str.encode(filename))
    num_actions = len(ale_int.getMinimalActionSet())

    args.num_actions = num_actions
    args.random_seed = 3

    ray.init()
    create_environment = lambda i: AtariEmulator.remote(i, args)

    emulators = np.asarray([create_environment(i) for i in range(4)])
    variables = [(np.asarray([
        ray.get(emulator.get_initial_state.remote()) for emulator in emulators
    ],
                             dtype=np.uint8)), (np.zeros(4, dtype=np.float32)),
                 (np.asarray([False] * 4, dtype=np.float32)),
                 (np.zeros((4, num_actions), dtype=np.float32))]

    for step in range(10):
        for i, (emulator, action) in enumerate(zip(emulators, variables[-1])):
            new_s, reward, episode_over = ray.get(emulator.next.remote(action))
            if episode_over:
                variables[0][i] = ray.get(emulator.get_initial_state.remote())
            else: