Пример #1
0
    def get_output_spec(self, input_spec):
        if util.product(xs=input_spec['shape']) != util.product(xs=self.shape):
            raise TensorforceError.value(name='Reshape', argument='shape', value=self.shape)

        input_spec['shape'] = self.shape

        return input_spec
Пример #2
0
    def tf_kl_divergence(self,
                         states,
                         internals,
                         auxiliaries,
                         other=None,
                         reduced=True,
                         include_per_action=False):
        kl_divergences = self.kl_divergences(states=states,
                                             internals=internals,
                                             auxiliaries=auxiliaries,
                                             other=other)

        for name, spec, kl_divergence in util.zip_items(
                self.actions_spec, kl_divergences):
            kl_divergences[name] = tf.reshape(
                tensor=kl_divergence,
                shape=(-1, util.product(xs=spec['shape'])))

        kl_divergence = tf.concat(values=tuple(kl_divergences.values()),
                                  axis=1)
        if reduced:
            kl_divergence = tf.math.reduce_sum(input_tensor=kl_divergence,
                                               axis=1)

        if include_per_action:
            kl_divergences['*'] = kl_divergence
            return kl_divergences
        else:
            return kl_divergence
Пример #3
0
    def tf_sample_actions(self, states, internals, auxiliaries, deterministic, return_internals):
        if return_internals:
            embedding, internals = self.network.apply(
                x=states, internals=internals, return_internals=return_internals
            )
        else:
            embedding = self.network.apply(
                x=states, internals=internals, return_internals=return_internals
            )

        Module.update_tensor(name=self.name, tensor=embedding)

        actions = OrderedDict()
        for name, spec, distribution in util.zip_items(self.actions_spec, self.distributions):
            if spec['type'] == 'int':
                mask = auxiliaries[name + '_mask']
                parameters = distribution.parametrize(x=embedding, mask=mask)
            else:
                parameters = distribution.parametrize(x=embedding)
            action = distribution.sample(parameters=parameters, deterministic=deterministic)

            entropy = distribution.entropy(parameters=parameters)
            entropy = tf.reshape(tensor=entropy, shape=(-1, util.product(xs=spec['shape'])))
            mean_entropy = tf.reduce_mean(input_tensor=entropy, axis=1)
            actions[name] = self.add_summary(
                label='entropy', name=(name + '-entropy'), tensor=mean_entropy, pass_tensors=action
            )

        if return_internals:
            return actions, internals
        else:
            return actions
Пример #4
0
    def tf_actions_value(self,
                         states,
                         internals,
                         auxiliaries,
                         actions,
                         reduced=True,
                         include_per_action=False):
        actions_values = self.actions_values(states=states,
                                             internals=internals,
                                             auxiliaries=auxiliaries,
                                             actions=actions)

        for name, spec, actions_value in util.zip_items(
                self.actions_spec, actions_values):
            actions_values[name] = tf.reshape(
                tensor=actions_value,
                shape=(-1, util.product(xs=spec['shape'])))

        actions_value = tf.concat(values=tuple(actions_values.values()),
                                  axis=1)
        if reduced:
            actions_value = tf.math.reduce_mean(input_tensor=actions_value,
                                                axis=1)
            if include_per_action:
                for name in self.actions_spec:
                    actions_values[name] = tf.math.reduce_mean(
                        input_tensor=actions_values[name], axis=1)

        if include_per_action:
            actions_values['*'] = actions_value
            return actions_values
        else:
            return actions_value
Пример #5
0
    def __init__(self,
                 name,
                 action_spec,
                 embedding_shape,
                 summary_labels=None):
        super().__init__(name=name,
                         action_spec=action_spec,
                         embedding_shape=embedding_shape,
                         summary_labels=summary_labels)

        input_spec = dict(type='float', shape=self.embedding_shape)

        if len(self.embedding_shape) == 1:
            action_size = util.product(xs=self.action_spec['shape'], empty=0)
            self.alpha = self.add_module(name='alpha',
                                         module='linear',
                                         modules=layer_modules,
                                         size=action_size,
                                         input_spec=input_spec)
            self.beta = self.add_module(name='beta',
                                        module='linear',
                                        modules=layer_modules,
                                        size=action_size,
                                        input_spec=input_spec)

        else:
            if len(self.embedding_shape) < 1 or len(self.embedding_shape) > 3:
                raise TensorforceError.value(name=name,
                                             argument='embedding_shape',
                                             value=self.embedding_shape,
                                             hint='invalid rank')
            if self.embedding_shape[:-1] == self.action_spec['shape'][:-1]:
                size = self.action_spec['shape'][-1]
            elif self.embedding_shape[:-1] == self.action_spec['shape']:
                size = 0
            else:
                raise TensorforceError.value(
                    name=name,
                    argument='embedding_shape',
                    value=self.embedding_shape,
                    hint='not flattened and incompatible with action shape')
            self.alpha = self.add_module(name='alpha',
                                         module='linear',
                                         modules=layer_modules,
                                         size=size,
                                         input_spec=input_spec)
            self.beta = self.add_module(name='beta',
                                        module='linear',
                                        modules=layer_modules,
                                        size=size,
                                        input_spec=input_spec)

        Module.register_tensor(name=(self.name + '-alpha'),
                               spec=dict(type='float',
                                         shape=self.action_spec['shape']),
                               batched=True)
        Module.register_tensor(name=(self.name + '-beta'),
                               spec=dict(type='float',
                                         shape=self.action_spec['shape']),
                               batched=True)
Пример #6
0
    def tf_entropy(self,
                   states,
                   internals,
                   auxiliaries,
                   reduced=True,
                   include_per_action=False):
        entropies = self.entropies(states=states,
                                   internals=internals,
                                   auxiliaries=auxiliaries)

        for name, spec, entropy in util.zip_items(self.actions_spec,
                                                  entropies):
            entropies[name] = tf.reshape(
                tensor=entropy, shape=(-1, util.product(xs=spec['shape'])))

        entropy = tf.concat(values=tuple(entropies.values()), axis=1)

        if reduced:
            entropy = tf.math.reduce_mean(input_tensor=entropy, axis=1)
            if include_per_action:
                for name in self.actions_spec:
                    entropies[name] = tf.math.reduce_mean(
                        input_tensor=entropies[name], axis=1)

        if include_per_action:
            entropies['*'] = entropy
            return entropies
        else:
            return entropy
Пример #7
0
    def tf_regularize(self, states, internals):
        regularization_loss = super().tf_regularize(states=states, internals=internals)

        entropies = list()
        embedding = self.network.apply(x=states, internals=internals)
        for name, distribution in self.distributions.items():
            parameters = distribution.parametrize(x=embedding)
            entropy = distribution.entropy(parameters=parameters)
            collapsed_size = util.product(xs=util.shape(entropy)[1:])
            entropy = tf.reshape(tensor=entropy, shape=(-1, collapsed_size))
            entropies.append(entropy)

        entropies = tf.concat(values=entropies, axis=1)
        entropy_per_instance = tf.reduce_mean(input_tensor=entropies, axis=1)
        entropy = tf.reduce_mean(input_tensor=entropy_per_instance, axis=0)
        # entropy = self.add_summary(label='entropy', name='entropy', tensor=entropy)

        entropy_regularization = self.entropy_regularization.value()

        regularization_loss = regularization_loss - entropy_regularization * entropy

        # def no_entropy_reg():
        #     return regularization_loss

        # def apply_entropy_reg():
        #     # ...
        #     return regularization_loss - entropy_regularization * entropy

        # zero = tf.constant(value=0.0, dtype=util.tf_dtype(dtype='float'))
        # skip_entropy_reg = tf.math.equal(x=entropy_regularization, y=zero)
        # regularization_loss = self.cond(pred=skip_entropy_reg, true_fn=no_entropy_reg, false_fn=apply_entropy_reg)

        return regularization_loss
    def tf_loss_per_instance(
        self, states, internals, actions, terminal, reward, next_states, next_internals,
        reference=None
    ):
        embedding = self.network.apply(x=states, internals=internals)

        log_probs = list()
        for name, distribution, action in util.zip_items(self.distributions, actions):
            parameters = distribution.parametrize(x=embedding)
            log_prob = distribution.log_probability(parameters=parameters, action=action)
            collapsed_size = util.product(xs=util.shape(log_prob)[1:])
            log_prob = tf.reshape(tensor=log_prob, shape=(-1, collapsed_size))
            log_probs.append(log_prob)

        log_probs = tf.concat(values=log_probs, axis=1)
        if reference is None:
            old_log_probs = tf.stop_gradient(input=log_probs)
        else:
            old_log_probs = reference

        # Comment on log_ratio 1.0 and gradient perspective
        prob_ratios = tf.exp(x=(log_probs - old_log_probs))
        prob_ratio_per_instance = tf.reduce_mean(input_tensor=prob_ratios, axis=1)

        likelihood_ratio_clipping = self.likelihood_ratio_clipping.value()

        clipped_prob_ratio_per_instance = tf.clip_by_value(
            t=prob_ratio_per_instance,
            clip_value_min=(1.0 / (1.0 + likelihood_ratio_clipping)),
            clip_value_max=(1.0 + likelihood_ratio_clipping)
        )
        return -tf.minimum(
            x=(prob_ratio_per_instance * reward),
            y=(clipped_prob_ratio_per_instance * reward)
        )
Пример #9
0
    def __init__(self, name, action_spec, embedding_size, summary_labels=None):
        super().__init__(name=name,
                         action_spec=action_spec,
                         embedding_size=embedding_size,
                         summary_labels=summary_labels)

        action_size = util.product(xs=self.action_spec['shape'], empty=0)
        input_spec = dict(type='float', shape=(self.embedding_size, ))
        self.mean = self.add_module(name='mean',
                                    module='linear',
                                    modules=layer_modules,
                                    size=action_size,
                                    input_spec=input_spec)
        self.log_stddev = self.add_module(name='log-stddev',
                                          module='linear',
                                          modules=layer_modules,
                                          size=action_size,
                                          input_spec=input_spec)

        Module.register_tensor(name=(self.name + '-mean'),
                               spec=dict(type='float',
                                         shape=self.action_spec['shape']),
                               batched=True)
        Module.register_tensor(name=(self.name + '-stddev'),
                               spec=dict(type='float',
                                         shape=self.action_spec['shape']),
                               batched=True)
Пример #10
0
    def __init__(self,
                 name,
                 action_spec,
                 embedding_size,
                 infer_states_value=True,
                 summary_labels=None):
        super().__init__(name=name,
                         action_spec=action_spec,
                         embedding_size=embedding_size,
                         summary_labels=summary_labels)
        shape = self.action_spec['shape']
        num_values = self.action_spec['num_values']
        action_size = util.product(xs=shape)
        input_spec = dict(type='float', shape=(self.embedding_size, ))
        self.deviations = self.add_module(name='deviations',
                                          module='linear',
                                          modules=layer_modules,
                                          size=(action_size * num_values),
                                          input_spec=input_spec)
        if infer_states_value:
            self.value = None
        else:
            self.value = self.add_module(name='value',
                                         module='linear',
                                         modules=layer_modules,
                                         size=action_size,
                                         input_spec=input_spec)

        Module.register_tensor(name=(self.name + '-probabilities'),
                               spec=dict(type='float',
                                         shape=(shape + (num_values, ))),
                               batched=True)
Пример #11
0
    def tf_log_probability(self,
                           states,
                           internals,
                           auxiliaries,
                           actions,
                           reduced=True,
                           include_per_action=False):
        log_probabilities = self.log_probabilities(states=states,
                                                   internals=internals,
                                                   auxiliaries=auxiliaries,
                                                   actions=actions)

        for name, spec, log_probability in util.zip_items(
                self.actions_spec, log_probabilities):
            log_probabilities[name] = tf.reshape(
                tensor=log_probability,
                shape=(-1, util.product(xs=spec['shape'])))

        log_probability = tf.concat(values=tuple(log_probabilities.values()),
                                    axis=1)
        if reduced:
            log_probability = tf.math.reduce_sum(input_tensor=log_probability,
                                                 axis=1)

        if include_per_action:
            log_probabilities['*'] = log_probability
            return log_probabilities
        else:
            return log_probability
Пример #12
0
    def tf_loss_per_instance(
        self, states, internals, actions, terminal, reward, next_states, next_internals,
        reference=None
    ):
        # Really state value instead of q value?
        # Michael: doubling this function because NAF needs V'(s) not Q'(s), see comment below
        embedding = self.network.apply(x=states, internals=internals)

        # Both networks can use the same internals, could that be a problem?
        # Otherwise need to handle internals indices correctly everywhere
        target_internals = OrderedDict()
        for name, internal in next_internals.items():
            target_internals['target-' + name] = internal
        Module.update_tensors(**target_internals)
        target_embedding = self.target_network.apply(x=next_states, internals=target_internals)

        deltas = list()
        for name in sorted(self.distributions):
            distribution = self.distributions[name]
            target_distribution = self.target_distributions[name]

            parameters = distribution.parametrize(x=embedding)
            target_parameters = target_distribution.parametrize(x=target_embedding)

            q_value = self.tf_q_value(
                embedding=embedding, parameters=parameters, action=actions[name], name=name
            )

            # Notice, this is V', not Q' because NAF outputs V(s) separately
            next_state_value = target_distribution.states_value(parameters=target_parameters)

            delta = self.tf_q_delta(
                q_value=q_value, next_q_value=next_state_value, terminal=terminal, reward=reward
            )

            collapsed_size = util.product(xs=util.shape(delta)[1:])
            delta = tf.reshape(tensor=delta, shape=(-1, collapsed_size))

            deltas.append(delta)

        # Surrogate loss as the mean squared error between actual observed rewards and expected rewards
        loss_per_instance = tf.reduce_mean(input_tensor=tf.concat(values=deltas, axis=1), axis=1)

        # Optional Huber loss
        huber_loss = self.huber_loss.value()

        def no_huber_loss():
            return tf.square(x=loss_per_instance)

        def apply_huber_loss():
            return tf.where(
                condition=(tf.abs(x=loss_per_instance) <= huber_loss),
                x=(0.5 * tf.square(x=loss_per_instance)),
                y=(huber_loss * (tf.abs(x=loss_per_instance) - 0.5 * huber_loss))
            )

        zero = tf.constant(value=0.0, dtype=util.tf_dtype(dtype='float'))
        skip_huber_loss = tf.math.equal(x=huber_loss, y=zero)
        return self.cond(pred=skip_huber_loss, true_fn=no_huber_loss, false_fn=apply_huber_loss)
Пример #13
0
    def output_spec(self):
        output_spec = super().output_spec()

        if output_spec.size != util.product(xs=self.shape):
            raise TensorforceError.value(name='Reshape', argument='shape', value=self.shape)
        output_spec.shape = self.shape

        return output_spec
Пример #14
0
    def get_output_spec(self, input_spec):
        if self.reduction == 'concat':
            input_spec['shape'] = (util.product(xs=input_spec['shape']), )
        elif self.reduction in ('max', 'mean', 'product', 'sum'):
            input_spec['shape'] = (input_spec['shape'][-1], )
        input_spec.pop('min_value', None)
        input_spec.pop('max_value', None)

        return input_spec
Пример #15
0
    def tf_optimization(
        self, states, internals, actions, terminal, reward, next_states=None, next_internals=None
    ):
        """
        Creates the TensorFlow operations for performing an optimization update step based
        on the given input states and actions batch.

        Args:
            states: Dict of state tensors.
            internals: List of prior internal state tensors.
            actions: Dict of action tensors.
            terminal: Terminal boolean tensor.
            reward: Reward tensor.
            next_states: Dict of successor state tensors.
            next_internals: List of posterior internal state tensors.

        Returns:
            The optimization operation.
        """
        parameters_before = OrderedDict()
        embedding = self.network.apply(x=states, internals=internals)
        for name, distribution in self.distributions.items():
            parameters_before[name] = distribution.parametrize(x=embedding)

        with tf.control_dependencies(control_inputs=util.flatten(xs=parameters_before)):
            optimized = super().tf_optimization(
                states=states, internals=internals, actions=actions, terminal=terminal,
                reward=reward, next_states=next_states, next_internals=next_internals
            )

        with tf.control_dependencies(control_inputs=(optimized,)):
            summaries = list()
            embedding = self.network.apply(x=states, internals=internals)
            for name, distribution in self.distributions.items():
                parameters = distribution.parametrize(x=embedding)
                kl_divergence = distribution.kl_divergence(
                    parameters1=parameters_before[name], parameters2=parameters
                )
                collapsed_size = util.product(xs=util.shape(kl_divergence)[1:])
                kl_divergence = tf.reshape(tensor=kl_divergence, shape=(-1, collapsed_size))
                kl_divergence = tf.reduce_mean(input_tensor=kl_divergence, axis=1)
                kl_divergence = self.add_summary(
                    label='kl-divergence', name=(name + '-kldiv'), tensor=kl_divergence
                )
                summaries.append(kl_divergence)

                entropy = distribution.entropy(parameters=parameters)
                entropy = tf.reshape(tensor=entropy, shape=(-1, collapsed_size))
                entropy = tf.reduce_mean(input_tensor=entropy, axis=1)
                entropy = self.add_summary(
                    label='entropy', name=(name + '-entropy'), tensor=entropy
                )
                summaries.append(entropy)

        with tf.control_dependencies(control_inputs=summaries):
            return util.no_operation()
def embedding(input, indices, size, name='embs'):
    with tf.compat.v1.variable_scope(name):
        shape = (indices, size)
        stddev = min(0.1, sqrt(2.0 / (util.product(xs=shape[:-1]) + shape[-1])))
        initializer = tf.random.normal(shape=shape, stddev=stddev, dtype=tf.float32)
        W = tf.Variable(
            initial_value=initializer, trainable=True, validate_shape=True, name='W',
            dtype=tf.float32, shape=shape
        )
        return tf.nn.tanh(tf.compat.v1.nn.embedding_lookup(params=W, ids=input, max_norm=None))
Пример #17
0
    def __init__(self,
                 name,
                 action_spec,
                 embedding_shape,
                 summary_labels=None):
        super().__init__(name=name,
                         action_spec=action_spec,
                         embedding_shape=embedding_shape,
                         summary_labels=summary_labels)

        input_spec = dict(type='float', shape=self.embedding_shape)

        if len(self.embedding_shape) == 1:
            action_size = util.product(xs=self.action_spec['shape'], empty=0)
            self.mean = self.add_module(name='mean',
                                        module='linear',
                                        modules=layer_modules,
                                        size=action_size,
                                        input_spec=input_spec)
            self.log_stddev = self.add_module(name='log-stddev',
                                              module='linear',
                                              modules=layer_modules,
                                              size=action_size,
                                              input_spec=input_spec)

        else:
            if len(self.embedding_shape) < 1 or len(self.embedding_shape) > 3:
                raise TensorforceError.unexpected()
            if self.embedding_shape[:-1] == self.action_spec['shape'][:-1]:
                size = self.action_spec['shape'][-1]
            elif self.embedding_shape[:-1] == self.action_spec['shape']:
                size = 0
            else:
                raise TensorforceError.unexpected()
            self.mean = self.add_module(name='mean',
                                        module='linear',
                                        modules=layer_modules,
                                        size=size,
                                        input_spec=input_spec)
            self.log_stddev = self.add_module(name='log-stddev',
                                              module='linear',
                                              modules=layer_modules,
                                              size=size,
                                              input_spec=input_spec)

        Module.register_tensor(name=(self.name + '-mean'),
                               spec=dict(type='float',
                                         shape=self.action_spec['shape']),
                               batched=True)
        Module.register_tensor(name=(self.name + '-stddev'),
                               spec=dict(type='float',
                                         shape=self.action_spec['shape']),
                               batched=True)
Пример #18
0
    def __init__(self, *, name=None, action_spec=None, input_spec=None):
        assert action_spec.type == 'float' and action_spec.min_value is not None and \
            action_spec.max_value is not None

        parameters_spec = TensorsSpec(
            alpha=TensorSpec(type='float', shape=action_spec.shape),
            beta=TensorSpec(type='float', shape=action_spec.shape),
            alpha_beta=TensorSpec(type='float', shape=action_spec.shape),
            log_norm=TensorSpec(type='float', shape=action_spec.shape)
        )
        conditions_spec = TensorsSpec()

        super().__init__(
            name=name, action_spec=action_spec, input_spec=input_spec,
            parameters_spec=parameters_spec, conditions_spec=conditions_spec
        )

        if len(self.input_spec.shape) == 1:
            # Single embedding
            action_size = util.product(xs=self.action_spec.shape, empty=0)
            self.alpha = self.submodule(
                name='alpha', module='linear', modules=layer_modules, size=action_size,
                initialization_scale=0.01, input_spec=self.input_spec
            )
            self.beta = self.submodule(
                name='beta', module='linear', modules=layer_modules, size=action_size,
                initialization_scale=0.01, input_spec=self.input_spec
            )

        else:
            # Embedding per action
            if len(self.input_spec.shape) < 1 or len(self.input_spec.shape) > 3:
                raise TensorforceError.value(
                    name=name, argument='input_spec.shape', value=self.input_spec.shape,
                    hint='invalid rank'
                )
            if self.input_spec.shape[:-1] == self.action_spec.shape[:-1]:
                size = self.action_spec.shape[-1]
            elif self.input_spec.shape[:-1] == self.action_spec.shape:
                size = 0
            else:
                raise TensorforceError.value(
                    name=name, argument='input_spec.shape', value=self.input_spec.shape,
                    hint='not flattened and incompatible with action shape'
                )
            self.alpha = self.submodule(
                name='alpha', module='linear', modules=layer_modules, size=size,
                initialization_scale=0.01, input_spec=self.input_spec
            )
            self.beta = self.submodule(
                name='beta', module='linear', modules=layer_modules, size=size,
                initialization_scale=0.01, input_spec=self.input_spec
            )
Пример #19
0
    def __init__(self, name, action_spec, embedding_size, summary_labels=None):
        super().__init__(name=name,
                         action_spec=action_spec,
                         embedding_size=embedding_size,
                         summary_labels=summary_labels)

        action_size = util.product(xs=self.action_spec['shape'], empty=0)
        input_spec = dict(type='float', shape=(self.embedding_size, ))
        self.logit = self.add_module(name='logit',
                                     module='linear',
                                     modules=layer_modules,
                                     size=action_size,
                                     input_spec=input_spec)
Пример #20
0
    def __init__(
        self, name, action_spec, embedding_shape, infer_states_value=True, summary_labels=None
    ):
        super().__init__(
            name=name, action_spec=action_spec, embedding_shape=embedding_shape,
            summary_labels=summary_labels
        )

        input_spec = dict(type='float', shape=self.embedding_shape)
        num_values = self.action_spec['num_values']

        if len(self.embedding_shape) == 1:
            action_size = util.product(xs=self.action_spec['shape'])
            self.deviations = self.add_module(
                name='deviations', module='linear', modules=layer_modules,
                size=(action_size * num_values), input_spec=input_spec
            )
            if infer_states_value:
                self.value = None
            else:
                self.value = self.add_module(
                    name='value', module='linear', modules=layer_modules, size=action_size,
                    input_spec=input_spec
                )

        else:
            if len(self.embedding_shape) < 1 or len(self.embedding_shape) > 3:
                raise TensorforceError.unexpected()
            if self.embedding_shape[:-1] == self.action_spec['shape'][:-1]:
                size = self.action_spec['shape'][-1]
            elif self.embedding_shape[:-1] == self.action_spec['shape']:
                size = 1
            else:
                raise TensorforceError.unexpected()
            self.deviations = self.add_module(
                name='deviations', module='linear', modules=layer_modules,
                size=(size * num_values), input_spec=input_spec
            )
            if infer_states_value:
                self.value = None
            else:
                self.value = self.add_module(
                    name='value', module='linear', modules=layer_modules, size=size,
                    input_spec=input_spec
                )

        Module.register_tensor(
            name=(self.name + '-probabilities'),
            spec=dict(type='float', shape=(self.action_spec['shape'] + (num_values,))),
            batched=True
        )
Пример #21
0
    def __init__(self, *, name=None, action_spec=None, input_spec=None):
        assert action_spec.type == 'bool'

        parameters_spec = TensorsSpec(
            true_logit=TensorSpec(type='float', shape=action_spec.shape),
            false_logit=TensorSpec(type='float', shape=action_spec.shape),
            probability=TensorSpec(type='float', shape=action_spec.shape),
            state_value=TensorSpec(type='float', shape=action_spec.shape))
        conditions_spec = TensorsSpec()

        super().__init__(name=name,
                         action_spec=action_spec,
                         input_spec=input_spec,
                         parameters_spec=parameters_spec,
                         conditions_spec=conditions_spec)

        if len(self.input_spec.shape) == 1:
            # Single embedding
            action_size = util.product(xs=self.action_spec.shape, empty=0)
            self.logit = self.submodule(name='logit',
                                        module='linear',
                                        modules=layer_modules,
                                        size=action_size,
                                        initialization_scale=0.01,
                                        input_spec=self.input_spec)

        else:
            # Embedding per action
            if len(self.input_spec.shape) < 1 or len(
                    self.input_spec.shape) > 3:
                raise TensorforceError.value(name=name,
                                             argument='input_spec.shape',
                                             value=self.input_spec.shape,
                                             hint='invalid rank')
            if self.input_spec.shape[:-1] == self.action_spec.shape[:-1]:
                size = self.action_spec.shape[-1]
            elif self.input_spec.shape[:-1] == self.action_spec.shape:
                size = 0
            else:
                raise TensorforceError.value(
                    name=name,
                    argument='input_spec.shape',
                    value=self.input_spec.shape,
                    hint='not flattened and incompatible with action shape')
            self.logit = self.submodule(name='logit',
                                        module='linear',
                                        modules=layer_modules,
                                        size=size,
                                        initialization_scale=0.01,
                                        input_spec=self.input_spec)
Пример #22
0
    def apply(self, *, x):
        queries = self.query.apply(x=x)
        keys = self.key.apply(x=x)
        values = self.value.apply(x=x)

        if self.input_spec.rank > 2:
            batch_size = tf_util.cast(x=tf.shape(input=x)[:1], dtype='int')

            flattened_shape = tf_util.constant(
                value=(util.product(xs=self.input_spec.shape[:-1]),
                       self.attention_size),
                dtype='int')
            flattened_shape = tf.concat(values=(batch_size, flattened_shape),
                                        axis=0)
            queries = tf.reshape(tensor=queries, shape=flattened_shape)
            keys = tf.reshape(tensor=keys, shape=flattened_shape)

            flattened_shape = tf_util.constant(
                value=(util.product(xs=self.input_spec.shape[:-1]), self.size),
                dtype='int')
            flattened_shape = tf.concat(values=(batch_size, flattened_shape),
                                        axis=0)
            values = tf.reshape(tensor=values, shape=flattened_shape)

        attention = tf.linalg.matmul(a=queries, b=keys, transpose_b=True)
        attention = attention / tf_util.constant(
            value=np.sqrt(self.attention_size), dtype='float')
        attention = tf.nn.softmax(logits=attention, axis=-1)
        x = tf.linalg.matmul(a=attention, b=values)

        if self.input_spec.rank > 2:
            shape = tf_util.constant(value=self.output_spec().shape,
                                     dtype='int')
            shape = tf.concat(values=(batch_size, shape), axis=0)
            x = tf.reshape(tensor=x, shape=shape)

        return super().apply(x=x)
    def tf_reference(
        self, states, internals, actions, terminal, reward, next_states, next_internals
    ):
        embedding = self.network.apply(x=states, internals=internals)

        log_probs = list()
        for name, distribution, action in util.zip_items(self.distributions, actions):
            parameters = distribution.parametrize(x=embedding)
            log_prob = distribution.log_probability(parameters=parameters, action=action)
            collapsed_size = util.product(xs=util.shape(log_prob)[1:])
            log_prob = tf.reshape(tensor=log_prob, shape=(-1, collapsed_size))
            log_probs.append(log_prob)

        log_probs = tf.concat(values=log_probs, axis=1)
        return tf.stop_gradient(input=log_probs)
Пример #24
0
    def tf_entropy(self, states, internals, auxiliaries, mean=True):
        entropies = self.entropies(states=states,
                                   internals=internals,
                                   auxiliaries=auxiliaries)

        for name, spec, entropy in util.zip_items(self.actions_spec,
                                                  entropies):
            entropies[name] = tf.reshape(
                tensor=entropy, shape=(-1, util.product(xs=spec['shape'])))

        entropy = tf.concat(values=tuple(entropies.values()), axis=1)
        if mean:
            entropy = tf.math.reduce_mean(input_tensor=entropy, axis=1)

        return entropy
    def __init__(self, name, action_spec, embedding_size, summary_labels=None):
        """
        Categorical distribution.
        """
        super().__init__(name=name,
                         action_spec=action_spec,
                         embedding_size=embedding_size,
                         summary_labels=summary_labels)

        action_size = util.product(
            xs=self.action_spec['shape']) * self.action_spec['num_values']
        input_spec = dict(type='float', shape=(self.embedding_size, ))
        self.logits = self.add_module(name='logits',
                                      module='linear',
                                      modules=layer_modules,
                                      size=action_size,
                                      input_spec=input_spec)
Пример #26
0
    def tf_states_value(self, states, internals, auxiliaries, mean=True):
        states_values = self.states_values(states=states,
                                           internals=internals,
                                           auxiliaries=auxiliaries)

        for name, spec, states_value in util.zip_items(self.actions_spec,
                                                       states_values):
            states_values[name] = tf.reshape(
                tensor=states_value,
                shape=(-1, util.product(xs=spec['shape'])))

        states_value = tf.concat(values=tuple(states_values.values()), axis=1)
        if mean:
            states_value = tf.math.reduce_mean(input_tensor=states_value,
                                               axis=1)

        return states_value
Пример #27
0
    def __init__(
        self,
        # Model
        states, actions, scope, device, saver, summarizer, execution, parallel_interactions,
        buffer_observe, exploration, variable_noise, states_preprocessing, reward_preprocessing,
        # MemoryModel
        update_mode, memory, optimizer, discount,
        # DistributionModel
        network, distributions, entropy_regularization,
        # QModel
        target_sync_frequency, target_update_weight, double_q_model, huber_loss
    ):
        if any(spec['type'] != 'float' or 'min_value' in spec or 'max_value' in spec for name, spec in actions.items()):
            raise TensorforceError("Only unconstrained float actions valid for NAFModel.")

        super().__init__(
            # Model
            states=states, actions=actions, scope=scope, device=device, saver=saver,
            summarizer=summarizer, execution=execution,
            parallel_interactions=parallel_interactions, buffer_observe=buffer_observe,
            exploration=exploration, variable_noise=variable_noise,
            states_preprocessing=states_preprocessing, reward_preprocessing=reward_preprocessing,
            # MemoryModel
            update_mode=update_mode, memory=memory, optimizer=optimizer, discount=discount,
            # DistributionModel
            network=network, distributions=distributions,
            entropy_regularization=entropy_regularization,
            # QModel
            target_sync_frequency=target_sync_frequency, target_update_weight=target_update_weight,
            double_q_model=double_q_model, huber_loss=huber_loss
        )

        self.state_values = OrderedDict()
        self.l_entries = OrderedDict()
        embedding_size = self.network.get_output_spec()['shape'][0]
        input_spec = dict(type='float', shape=(embedding_size,))
        for name, action_spec in self.actions_spec.items():
            action_size = util.product(xs=action_spec['shape'])
            self.state_values[name] = self.add_module(
                name=(name + '-state-value'), module='linear', modules=layer_modules,
                size=action_size, input_spec=input_spec
            )
            self.l_entries[name] = self.add_module(
                name=(name + '-l-entries'), module='linear', modules=layer_modules,
                size=action_size, input_spec=input_spec
            )
Пример #28
0
    def __init__(self, name, action_spec, embedding_size, summary_labels=None):
        """
        Categorical distribution.
        """
        super().__init__(
            name=name, action_spec=action_spec, embedding_size=embedding_size,
            summary_labels=summary_labels
        )

        action_size = util.product(xs=self.action_spec['shape'], empty=0)
        input_spec = dict(type='float', shape=(self.embedding_size,))
        self.mean = self.add_module(
            name='mean', module='linear', modules=layer_modules, size=action_size,
            input_spec=input_spec
        )
        self.log_stddev = self.add_module(
            name='log-stddev', module='linear', modules=layer_modules, size=action_size,
            input_spec=input_spec
        )
Пример #29
0
    def __init__(self, name, action_spec, embedding_shape, summary_labels=None):
        super().__init__(
            name=name, action_spec=action_spec, embedding_shape=embedding_shape,
            summary_labels=summary_labels
        )

        input_spec = dict(type='float', shape=self.embedding_shape)
        num_values = self.action_spec['num_values']

        if len(self.embedding_shape) == 1:
            action_size = util.product(xs=self.action_spec['shape'])
            self.deviations = self.add_module(
                name='deviations', module='linear', modules=layer_modules,
                size=(action_size * num_values), input_spec=input_spec
            )

        else:
            if len(self.embedding_shape) < 1 or len(self.embedding_shape) > 3:
                raise TensorforceError.value(
                    name=name, argument='embedding_shape', value=self.embedding_shape,
                    hint='invalid rank'
                )
            if self.embedding_shape[:-1] == self.action_spec['shape'][:-1]:
                size = self.action_spec['shape'][-1]
            elif self.embedding_shape[:-1] == self.action_spec['shape']:
                size = 1
            else:
                raise TensorforceError.value(
                    name=name, argument='embedding_shape', value=self.embedding_shape,
                    hint='not flattened and incompatible with action shape'
                )
            self.deviations = self.add_module(
                name='deviations', module='linear', modules=layer_modules,
                size=(size * num_values), input_spec=input_spec
            )

        Module.register_tensor(
            name=(self.name + '-probabilities'),
            spec=dict(type='float', shape=(self.action_spec['shape'] + (num_values,))),
            batched=True
        )
Пример #30
0
    def tf_q_value(self, embedding, parameters, action, name):
        num_action = util.product(xs=self.actions_spec[name]['shape'])

        mean, stddev, _ = parameters
        flat_mean = tf.reshape(tensor=mean, shape=(-1, num_action))
        flat_stddev = tf.reshape(tensor=stddev, shape=(-1, num_action))

        # Advantage computation
        # Network outputs entries of lower triangular matrix L
        if self.l_entries[name] is None:
            l_matrix = flat_stddev
            l_matrix = tf.exp(l_matrix)
        else:
            l_matrix = tf.linalg.diag(diagonal=flat_stddev)

            l_entries = self.l_entries[name].apply(x=embedding)
            l_entries = tf.exp(l_entries)
            offset = 0
            columns = list()
            for zeros, size in enumerate(range(num_action - 1, -1, -1), 1):
                column = tf.pad(tensor=l_entries[:, offset: offset + size], paddings=((0, 0), (zeros, 0)))
                columns.append(column)
                offset += size

            l_matrix += tf.stack(values=columns, axis=1)

        # P = LL^T
        p_matrix = tf.matmul(a=l_matrix, b=tf.transpose(a=l_matrix, perm=(0, 2, 1)))
        # A = -0.5 (a - mean)P(a - mean)
        flat_action = tf.reshape(tensor=action, shape=(-1, num_action))
        difference = flat_action - flat_mean
        advantage = tf.matmul(a=p_matrix, b=tf.expand_dims(input=difference, axis=2))
        advantage = tf.matmul(a=tf.expand_dims(input=difference, axis=1), b=advantage)
        advantage = tf.squeeze(input=(-advantage / 2.0), axis=2)

        # Q = A + V
        # State-value function
        state_value = self.state_values[name].apply(x=embedding)
        q_value = state_value + advantage

        return tf.reshape(tensor=q_value, shape=((-1,) + self.actions_spec[name]['shape']))