Exemplo n.º 1
0
    def __init__(self, states_spec, actions_spec, batched_observe, batch_size,
                 memory, first_update, update_frequency, repeat_update):
        """

        Args:
            states_spec: Dict containing at least one state definition. In the case of a single state,
               keys `shape` and `type` are necessary. For multiple states, pass a dict of dicts where each state
               is a dict itself with a unique name as its key.
            actions_spec: Dict containing at least one action definition. Actions have types and either `num_actions`
                for discrete actions or a `shape` for continuous actions. Consult documentation and tests for more.
            batched_observe: Optional int specifying how many observe calls are batched into one session run.
                Without batching, throughput will be lower because every `observe` triggers a session invocation to
                update rewards in the graph.
            batch_size: Int specifying batch size used to sample from memory. Should be smaller than memory size.
            memory: Dict describing memory via `type` (e.g. `replay`) and `capacity`.
            first_update: Int describing at which time step the first update is performed. Should be larger
                than batch size.
            update_frequency: Int specifying number of observe steps to perform until an update is executed.
            repeat_update: Int specifying how many update steps are performed per update, where each update step implies
                sampling a batch from the memory and passing it to the model.
        """
        self.memory_spec = memory
        self.batch_size = batch_size
        self.first_update = first_update
        self.update_frequency = update_frequency
        self.repeat_update = repeat_update

        super(MemoryAgent, self).__init__(states_spec=states_spec,
                                          actions_spec=actions_spec,
                                          batched_observe=batched_observe)

        self.memory = Memory.from_spec(spec=self.memory_spec,
                                       kwargs=dict(
                                           states_spec=self.states_spec,
                                           actions_spec=self.actions_spec))
Exemplo n.º 2
0
    def initialize(self, custom_getter):
        super(MemoryModel, self).initialize(custom_getter)

        # Memory
        self.memory = Memory.from_spec(
            spec=self.memory_spec,
            kwargs=dict(
                states=self.states_spec,
                internals=self.internals_spec,
                actions=self.actions_spec,
                summary_labels=self.summary_labels
            )
        )

        # Optimizer
        self.optimizer = Optimizer.from_spec(
            spec=self.optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels)
        )

        # TensorFlow functions
        self.fn_discounted_cumulative_reward = tf.make_template(
            name_='discounted-cumulative-reward',
            func_=self.tf_discounted_cumulative_reward,
            custom_getter_=custom_getter
        )
        self.fn_reference = tf.make_template(
            name_='reference',
            func_=self.tf_reference,
            custom_getter_=custom_getter
        )
        self.fn_loss_per_instance = tf.make_template(
            name_='loss-per-instance',
            func_=self.tf_loss_per_instance,
            custom_getter_=custom_getter
        )
        self.fn_regularization_losses = tf.make_template(
            name_='regularization-losses',
            func_=self.tf_regularization_losses,
            custom_getter_=custom_getter
        )
        self.fn_loss = tf.make_template(
            name_='loss',
            func_=self.tf_loss,
            custom_getter_=custom_getter
        )
        self.fn_optimization = tf.make_template(
            name_='optimization',
            func_=self.tf_optimization,
            custom_getter_=custom_getter
        )
        self.fn_import_experience = tf.make_template(
            name_='import-experience',
            func_=self.tf_import_experience,
            custom_getter_=custom_getter
        )
Exemplo n.º 3
0
    def __init__(self, config, model=None):
        config.default(MemoryAgent.default_config)
        super(MemoryAgent, self).__init__(config, model)

        self.batch_size = config.batch_size
        self.memory = Memory.from_config(config=config.memory,
                                         kwargs=dict(
                                             capacity=config.memory_capacity,
                                             states_config=config.states,
                                             actions_config=config.actions))
        self.update_frequency = config.update_frequency
        self.first_update = config.first_update
        self.repeat_update = config.repeat_update
Exemplo n.º 4
0
    def setup_components_and_tf_funcs(self, custom_getter=None):
        """
        Constructs the memory and the optimizer objects.
        Generates and stores all template functions.
        """
        custom_getter = super(
            MemoryModel, self).setup_components_and_tf_funcs(custom_getter)

        # Memory
        self.memory = Memory.from_spec(spec=self.memory_spec,
                                       kwargs=dict(
                                           states=self.states_spec,
                                           internals=self.internals_spec,
                                           actions=self.actions_spec,
                                           summary_labels=self.summary_labels))

        # Optimizer
        self.optimizer = Optimizer.from_spec(
            spec=self.optimizer_spec,
            kwargs=dict(summary_labels=self.summary_labels))

        # TensorFlow functions
        self.fn_discounted_cumulative_reward = tf.make_template(
            name_='discounted-cumulative-reward',
            func_=self.tf_discounted_cumulative_reward,
            custom_getter_=custom_getter)
        self.fn_reference = tf.make_template(name_='reference',
                                             func_=self.tf_reference,
                                             custom_getter_=custom_getter)
        self.fn_loss_per_instance = tf.make_template(
            name_='loss-per-instance',
            func_=self.tf_loss_per_instance,
            custom_getter_=custom_getter)
        self.fn_regularization_losses = tf.make_template(
            name_='regularization-losses',
            func_=self.tf_regularization_losses,
            custom_getter_=custom_getter)
        self.fn_loss = tf.make_template(name_='loss',
                                        func_=self.tf_loss,
                                        custom_getter_=custom_getter)
        self.fn_optimization = tf.make_template(name_='optimization',
                                                func_=self.tf_optimization,
                                                custom_getter_=custom_getter)
        self.fn_import_experience = tf.make_template(
            name_='import-experience',
            func_=self.tf_import_experience,
            custom_getter_=custom_getter)

        return custom_getter
Exemplo n.º 5
0
    def __init__(self, states_spec, actions_spec, config):
        config.default(MemoryAgent.default_config)
        self.batch_size = config.batch_size
        self.memory_capacity = config.memory_capacity
        self.update_frequency = config.update_frequency
        self.first_update = config.first_update
        self.repeat_update = config.repeat_update

        super(MemoryAgent, self).__init__(states_spec, actions_spec, config)

        self.memory = Memory.from_spec(spec=config.memory,
                                       kwargs=dict(
                                           capacity=self.memory_capacity,
                                           states_spec=self.states_spec,
                                           actions_spec=self.actions_spec))
Exemplo n.º 6
0
    def __init__(self, states_spec, actions_spec, config):
        self.memory_spec = config.memory
        self.batch_size = config.batch_size
        self.first_update = config.first_update
        self.update_frequency = config.update_frequency
        self.repeat_update = config.repeat_update

        super(MemoryAgent, self).__init__(states_spec=states_spec,
                                          actions_spec=actions_spec,
                                          config=config)

        self.memory = Memory.from_spec(spec=self.memory_spec,
                                       kwargs=dict(
                                           states_spec=self.states_spec,
                                           actions_spec=self.actions_spec))
Exemplo n.º 7
0
    def __init__(self, states_spec, actions_spec, preprocessing, exploration,
                 reward_preprocessing, batched_observe, batch_size, memory,
                 first_update, update_frequency, repeat_update):
        self.memory_spec = memory
        self.batch_size = batch_size
        self.first_update = first_update
        self.update_frequency = update_frequency
        self.repeat_update = repeat_update

        super(MemoryAgent,
              self).__init__(states_spec=states_spec,
                             actions_spec=actions_spec,
                             preprocessing=preprocessing,
                             exploration=exploration,
                             reward_preprocessing=reward_preprocessing,
                             batched_observe=batched_observe)

        self.memory = Memory.from_spec(spec=self.memory_spec,
                                       kwargs=dict(
                                           states_spec=self.states_spec,
                                           actions_spec=self.actions_spec))
Exemplo n.º 8
0
    def __init__(
        self,
        states_spec,
        actions_spec,
        batched_observe=1000,
        scope='memory_agent',
        # parameters specific to LearningAgents
        summary_spec=None,
        network_spec=None,
        discount=0.99,
        device=None,
        session_config=None,
        saver_spec=None,
        distributed_spec=None,
        optimizer=None,
        variable_noise=None,
        states_preprocessing_spec=None,
        explorations_spec=None,
        reward_preprocessing_spec=None,
        distributions_spec=None,
        entropy_regularization=None,
        # parameters specific to MemoryAgents
        batch_size=1000,
        memory=None,
        first_update=10000,
        update_frequency=4,
        repeat_update=1
    ):
        """

        Args:
            batch_size (int): The batch size used to sample from memory. Should be smaller than memory size.
            memory (Union[dict,Memory]): Dict describing memory via `type` (e.g. `replay`) and `capacity`.
                Alternatively, an actual Memory object can be passed in directly.
            first_update (int): At which time step the first update is performed. Should be larger
                than batch size.
            update_frequency (int): Number of `observe` steps to perform until an update is executed.
            repeat_update (int): How many update steps are performed per update, where each update step implies
                sampling a batch from the memory and passing it to the model.
        """
        super(MemoryAgent, self).__init__(
            states_spec=states_spec,
            actions_spec=actions_spec,
            batched_observe=batched_observe,
            scope=scope,
            # parameters specific to LearningAgent
            summary_spec=summary_spec,
            network_spec=network_spec,
            discount=discount,
            device=device,
            session_config=session_config,
            saver_spec=saver_spec,
            distributed_spec=distributed_spec,
            optimizer=optimizer,
            variable_noise=variable_noise,
            states_preprocessing_spec=states_preprocessing_spec,
            explorations_spec=explorations_spec,
            reward_preprocessing_spec=reward_preprocessing_spec,
            distributions_spec=distributions_spec,
            entropy_regularization=entropy_regularization
        )

        # Memory already given as a Memory object: Use that.
        if isinstance(memory, Memory):
            self.memory = memory
            self.memory_spec = None
        else:
            # Nothing given: Create a default memory spec.
            if memory is None:
                memory = dict(
                    type='replay',
                    capacity=100000
                )
            # Now create actual Memory object from the spec.
            self.memory_spec = memory
            self.memory = Memory.from_spec(
                spec=self.memory_spec,
                kwargs=dict(
                    states_spec=self.states_spec,
                    actions_spec=self.actions_spec
                )
            )
        self.batch_size = batch_size
        self.first_update = first_update
        self.update_frequency = update_frequency
        self.repeat_update = repeat_update