Пример #1
0
    def __init__(self, qec_table, ec_discount, num_actions, epsilon_start,
                 epsilon_min, epsilon_decay, exp_pref, rng):

        self.qec_table = qec_table
        self.ec_discount = ec_discount
        self.num_actions = num_actions
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.exp_pref = exp_pref
        self.rng = rng

        self.trace_list = EC_functions.TraceRecorder()

        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        # CREATE A FOLDER TO HOLD RESULTS
        time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
        self.exp_dir = self.exp_pref + time_str + \
                       "{}".format(self.ec_discount).replace(".", "p")

        try:
            os.stat(self.exp_dir)
        except OSError:
            os.makedirs(self.exp_dir)

        self._open_results_file()

        self.step_counter = None
        self.episode_reward = None

        self.total_reward = 0.
        self.total_episodes = 0

        self.start_time = None

        self.last_img = None
        self.last_action = None

        self.steps_sec_ema = 0.
Пример #2
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if parameters.use_episodic_control:
        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(
                parameters.knn, parameters.state_dimension,
                parameters.projection_type,
                defaults.RESIZED_WIDTH * defaults.RESIZED_HEIGHT,
                parameters.buffer_size, num_actions, rng)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = pickle.load(handle)

        agent = EC_agent.EpisodicControl(qec_table, parameters.ec_discount,
                                         num_actions, parameters.epsilon_start,
                                         parameters.epsilon_min,
                                         parameters.epsilon_decay,
                                         parameters.experiment_prefix, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
Пример #3
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    # FOR VISUALIZATION
    USE_SDL = False
    if parameters.display_screen:
        if USE_SDL:
            import sys
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if parameters.use_episodic_control:
        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(
                parameters.knn, parameters.state_dimension,
                parameters.projection_type,
                defaults.RESIZED_WIDTH * defaults.RESIZED_HEIGHT,
                parameters.buffer_size, num_actions, rng)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

            #If this doesnt work load using the function below
            # def try_to_load_as_pickled_object_or_None(filepath):
            #     """
            #     This is a defensive way to write pickle.load, allowing for very large files on all platforms
            #     """
            #     max_bytes = 2 ** 31 - 1
            #     try:
            #         input_size = os.path.getsize(filepath)
            #         bytes_in = bytearray(0)
            #         with open(filepath, 'rb') as f_in:
            #             for _ in range(0, input_size, max_bytes):
            #                 bytes_in += f_in.read(max_bytes)
            #         obj = cPickle.loads(bytes_in)
            #     except:
            #         return None
            #     return obj
            # qec_table = try_to_load_as_pickled_object_or_None(handle)

        agent = IBL_agent.EpisodicControl(qec_table, parameters.ec_discount,
                                          num_actions,
                                          parameters.epsilon_start,
                                          parameters.epsilon_min,
                                          parameters.epsilon_decay,
                                          parameters.experiment_prefix, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
Пример #4
0
    def __init__(self, qec_table, ec_discount, num_actions, epsilon_start, epsilon_min, epsilon_decay, exp_pref, rng):
        self.qec_table = qec_table
        self.ec_discount = ec_discount
        self.num_actions = num_actions
        self.epsilon_start = epsilon_start
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.exp_pref = exp_pref
        self.rng = rng

        self.trace_list = EC_functions.TraceRecorder()

        self.epsilon = self.epsilon_start
        if self.epsilon_decay != 0:
            self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) /
                                 self.epsilon_decay)
        else:
            self.epsilon_rate = 0

        # CREATE IBL AGENT
        # Older
        # attrs = OrderedDict({'key' + str(i): i for i in range(0, 63)}) # Maybe 64!
        # self.DM = Agent('DM', attrs.keys())

        # Newest
        self.DM = Agent('DM',['state']) # no need to have anything here at the beginning
        self.DM.defaultUtility = 1000 # Instead you can prepopulate with the random initial actions in the while loop in ale_experiment.py

        def eucl(v1, v2):
            v1 = np.array(v1)
            v2 = np.array(v2)
            return np.linalg.norm(v1 - v2)
        # Init Partial Matching
        self.DM.similarity('state', eucl)
        self.DM.mismatchPenalty = 0.5
        # Create general situation and situationdecision sets. They will be updated later on.
        self.situation = Situation(state=()) # state attribute is empty TUPLE
        self.situationdecisions = [SituationDecision(str(action), self.situation) for action in range(self.num_actions)]

        # You might need to pre-populate with few instances

        # CREATE A FOLDER TO HOLD RESULTS
        time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
        self.exp_dir = self.exp_pref + time_str + \
                       "{}".format(self.ec_discount).replace(".", "p")

        try:
            os.stat(self.exp_dir)
        except OSError:
            os.makedirs(self.exp_dir)

        self._open_results_file()

        self.step_counter = None
        self.episode_reward = None

        self.total_reward = 0.
        self.total_episodes = 0

        self.start_time = None

        self.last_img = None
        self.last_action = None

        self.steps_sec_ema = 0.
Пример #5
0
    def __init__(self, session, args):
        self.n_input = args.input_size  # Number of features in each observation
        self.num_obs = 2  # Number of observations in each state
        self.n_actions = args.num_actions  # Number of output q_values
        self.discount = args.discount  # Discount factor
        self.epsilon = args.epsilon  # Epsilon
        self.learning_rate = args.learning_rate
        self.beta = args.beta
        self.delta = 0.01
        self.number_nn = 50
        self.layer_sizes = [self.n_input] + args.layer_sizes
        self.session = session

        self.memory = ReplayMemory(args)

        self.old_way = False

        # Tensorflow variables:

        # Model for Embeddings
        self.state = tf.placeholder("float", [None, self.n_input])
        self.action = tf.placeholder(tf.int64, [None])
        with tf.variable_scope('embedding'):
            self.state_embeddings, self.weights = self.network(
                self.state, self.layer_sizes)

        # DNDs
        self.DNDs = []
        for a in xrange(self.n_actions):
            new_DND = EC_functions.LRU_KNN(
                5000,
                self.state_embeddings.get_shape()[-1])
            self.DNDs.append(new_DND)

        # DND Calculations (everything from here on needs these placeholders filled)
        if self.old_way:
            self.dnd_embeddings = tf.placeholder(
                "float",
                [None, None, self.state_embeddings.get_shape()[-1]],
                name="dnd_embeddings")
            self.dnd_values = tf.placeholder("float", [None, None],
                                             name="dnd_values")
        else:  # Call on DND directly
            embs_and_values = tf.py_func(self.get_nearest_neighbours,
                                         [self.state_embeddings, self.action],
                                         [tf.float64, tf.float64])
            self.dnd_embeddings = tf.to_float(embs_and_values[0])
            self.dnd_values = tf.to_float(embs_and_values[1])

        weightings = 1.0 / (tf.reduce_sum(tf.square(
            self.dnd_embeddings - tf.expand_dims(self.state_embeddings, 1)),
                                          axis=2) + [self.delta])
        normalised_weightings = weightings / tf.reduce_sum(
            weightings, axis=1, keep_dims=True)  #keep dims for broadcasting
        if self.beta == 0:
            self.pred_q = tf.reduce_sum(self.dnd_values *
                                        normalised_weightings,
                                        axis=1)
            #self.pred_q = tf.reduce_mean(self.dnd_values, axis=1)
        else:
            self.pred_q = tf.log(
                tf.reduce_sum(tf.exp(self.beta * self.dnd_values) *
                              normalised_weightings,
                              axis=1))

        # Loss Function
        self.target_q = tf.placeholder("float", [None])
        self.td_err = self.target_q - self.pred_q
        total_loss = tf.reduce_sum(tf.square(self.td_err))

        self.optim = tf.train.AdamOptimizer(
            self.learning_rate).minimize(total_loss)
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if parameters.method == 'ec_dqn':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_ec=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.EC_DQN(network,
                                  qec_table,
                                  parameters.epsilon_start,
                                  parameters.epsilon_min,
                                  parameters.epsilon_decay,
                                  parameters.replay_memory_size,
                                  parameters.experiment_prefix,
                                  parameters.replay_start_size,
                                  parameters.update_frequency,
                                  parameters.ec_discount,
                                  num_actions,
                                  parameters.ec_testing,
                                  rng)

    if parameters.method == 'dqn_episodic_memory1':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.NeuralNetworkEpisodicMemory1(network,
                                                        qec_table,
                                                        parameters.epsilon_start,
                                                        parameters.epsilon_min,
                                                        parameters.epsilon_decay,
                                                        parameters.replay_memory_size,
                                                        parameters.experiment_prefix,
                                                        parameters.replay_start_size,
                                                        parameters.update_frequency,
                                                        parameters.ec_discount,
                                                        num_actions,
                                                        parameters.ec_testing,
                                                        rng)
    if parameters.method == 'dqn_episodic_memory2':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

    if parameters.method == 'dqn_episodic_memory3':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.LshHash(parameters.state_dimension,
                                             defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                             parameters.buffer_size,
                                             rng)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.NeuralNetworkEpisodicMemory3(network,
                                                        qec_table,
                                                        parameters.epsilon_start,
                                                        parameters.epsilon_min,
                                                        parameters.epsilon_decay,
                                                        parameters.replay_memory_size,
                                                        parameters.experiment_prefix,
                                                        parameters.replay_start_size,
                                                        parameters.update_frequency,
                                                        parameters.ec_discount,
                                                        num_actions,
                                                        parameters.ec_testing,
                                                        rng)

    if parameters.method == 'dqn':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        agent = ale_agents.NeuralAgent(network,
                                       parameters.epsilon_start,
                                       parameters.epsilon_min,
                                       parameters.epsilon_decay,
                                       parameters.replay_memory_size,
                                       parameters.experiment_prefix,
                                       parameters.replay_start_size,
                                       parameters.update_frequency,
                                       rng)

    if parameters.method == 'episodic_control':
            if parameters.qec_table is None:
                qec_table = EC_functions.QECTable(parameters.knn,
                                                  parameters.state_dimension,
                                                  parameters.projection_type,
                                                  defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                                  parameters.buffer_size,
                                                  num_actions,
                                                  rng,
                                                  parameters.rebuild_knn_frequency)
            else:
                handle = open(parameters.qec_table, 'r')
                qec_table = cPickle.load(handle)

            agent = ale_agents.EpisodicControl(qec_table,
                                               parameters.ec_discount,
                                               num_actions,
                                               parameters.epsilon_start,
                                               parameters.epsilon_min,
                                               parameters.epsilon_decay,
                                               parameters.experiment_prefix,
                                               parameters.ec_testing,
                                               rng)

    experiment = ale_experiment.ALEExperiment(ale, agent,
                                              defaults.RESIZED_WIDTH,
                                              defaults.RESIZED_HEIGHT,
                                              parameters.resize_method,
                                              parameters.epochs,
                                              parameters.steps_per_epoch,
                                              parameters.steps_per_test,
                                              parameters.frame_skip,
                                              parameters.death_ends_episode,
                                              parameters.max_start_nullops,
                                              rng)

    experiment.run()