Exemplo n.º 1
0
    def apply(self, *, x):
        if len(self.tensors) == 1:
            return x[self.tensors[0]]

        x = list(x.values())

        shape = self.output_spec().shape
        for n, tensor in enumerate(x):
            for axis in range(tf_util.rank(x=tensor), len(shape)):
                tensor = tf.expand_dims(input=tensor, axis=axis)
            x[n] = tensor

        if self.aggregation == 'concat':
            x = tf.concat(values=x, axis=(self.axis + 1))

        elif self.aggregation == 'product':
            x = tf.stack(values=x, axis=(self.axis + 1))
            x = tf.reduce_prod(input_tensor=x, axis=(self.axis + 1))

        elif self.aggregation == 'stack':
            x = tf.stack(values=x, axis=(self.axis + 1))

        elif self.aggregation == 'sum':
            x = tf.stack(values=x, axis=(self.axis + 1))
            x = tf.reduce_sum(input_tensor=x, axis=(self.axis + 1))

        return x
Exemplo n.º 2
0
    def log_probability(self, *, parameters, action):
        logits = parameters['logits']

        rank = tf_util.rank(x=action)
        action = tf.expand_dims(input=action, axis=rank)
        logit = tf.gather(params=logits, indices=action, batch_dims=rank)
        return tf.squeeze(input=logit, axis=rank)
Exemplo n.º 3
0
 def fn_summary():
     xs = list()
     for variable in variables:
         xs.extend(
             tf.nn.moments(x=variable,
                           axes=list(range(
                               tf_util.rank(x=variable)))))
     return xs
Exemplo n.º 4
0
    def action_value(self, *, parameters, action):
        action_values = parameters['action_values']

        rank = tf_util.rank(x=action)
        action = tf.expand_dims(input=action, axis=rank)
        action_value = tf.gather(params=action_values,
                                 indices=action,
                                 batch_dims=rank)
        return tf.squeeze(input=action_value, axis=rank)
Exemplo n.º 5
0
            def body(indices, remaining, current_x, current_internals):
                current_internals = internals_signature.args_to_kwargs(
                    args=current_internals)
                current_x = tf.gather(params=x, indices=indices)
                next_x, next_internals = self.iterative_apply(
                    x=current_x, internals=current_internals)

                with tf.control_dependencies(control_inputs=(current_x,
                                                             next_x)):
                    is_finished = tf.math.equal(x=remaining, y=zeros)
                    if isinstance(next_internals, dict):
                        for name, current_internal, next_internal in current_internals.zip_items(
                                next_internals):
                            condition = is_finished
                            for _ in range(
                                    tf_util.rank(x=current_internal) - 1):
                                condition = tf.expand_dims(input=condition,
                                                           axis=1)
                            next_internals[name] = tf.where(
                                condition=condition,
                                x=current_internal,
                                y=next_internal)

                    else:
                        condition = is_finished
                        for _ in range(tf_util.rank(x=current_internals) - 1):
                            condition = tf.expand_dims(input=condition, axis=1)
                        next_internals = tf.where(condition=condition,
                                                  x=current_internals,
                                                  y=next_internals)

                    remaining -= tf.where(condition=is_finished,
                                          x=zeros,
                                          y=ones)
                    indices += tf.where(condition=tf.math.equal(x=remaining,
                                                                y=zeros),
                                        x=zeros,
                                        y=ones)

                next_internals = internals_signature.kwargs_to_args(
                    kwargs=next_internals)
                return indices, remaining, next_x, next_internals
Exemplo n.º 6
0
    def apply(self, *, x):
        if self.reduction == 'concat':
            return tf.reshape(tensor=x, shape=(-1, self.output_spec().size))

        elif self.reduction == 'max':
            for _ in range(tf_util.rank(x=x) - 2):
                x = tf.reduce_max(input_tensor=x, axis=1)
            return x

        elif self.reduction == 'mean':
            for _ in range(tf_util.rank(x=x) - 2):
                x = tf.reduce_mean(input_tensor=x, axis=1)
            return x

        elif self.reduction == 'product':
            for _ in range(tf_util.rank(x=x) - 2):
                x = tf.reduce_prod(input_tensor=x, axis=1)
            return x

        elif self.reduction == 'sum':
            for _ in range(tf_util.rank(x=x) - 2):
                x = tf.reduce_sum(input_tensor=x, axis=1)
            return x
Exemplo n.º 7
0
    def iterative_body(self, x, indices, remaining, current_x,
                       current_internals):
        batch_size = tf_util.cast(x=tf.shape(input=current_x)[:1], dtype='int')
        zeros = tf_util.zeros(shape=batch_size, dtype='int')
        ones = tf_util.ones(shape=batch_size, dtype='int')
        batch_size = batch_size[0]

        current_x = tf.gather(params=x, indices=indices)
        next_x, next_internals = self.iterative_apply(
            x=current_x, internals=current_internals)

        with tf.control_dependencies(control_inputs=(current_x, next_x)):
            is_finished = tf.math.equal(x=remaining, y=zeros)
            if isinstance(next_internals, dict):
                for name, current_internal, next_internal in current_internals.zip_items(
                        next_internals):
                    condition = is_finished
                    for _ in range(tf_util.rank(x=current_internal) - 1):
                        condition = tf.expand_dims(input=condition, axis=1)
                    next_internals[name] = tf.where(condition=condition,
                                                    x=current_internal,
                                                    y=next_internal)

            else:
                condition = is_finished
                for _ in range(tf_util.rank(x=current_internals) - 1):
                    condition = tf.expand_dims(input=condition, axis=1)
                next_internals = tf.where(condition=condition,
                                          x=current_internals,
                                          y=next_internals)

            remaining -= tf.where(condition=is_finished, x=zeros, y=ones)
            indices += tf.where(condition=tf.math.equal(x=remaining, y=zeros),
                                x=zeros,
                                y=ones)

        return x, indices, remaining, next_x, next_internals
Exemplo n.º 8
0
    def update(self, *, arguments, variables, **kwargs):
        assert self.is_initialized_given_variables
        assert all(variable.dtype.is_floating for variable in variables)

        deltas = self.step(arguments=arguments, variables=variables, **kwargs)

        assertions = list(deltas)
        if self.config.create_debug_assertions:
            from tensorforce.core.optimizers import DoublecheckStep, NaturalGradient, \
                Synchronization, UpdateModifier
            optimizer = self
            while isinstance(optimizer, UpdateModifier):
                if isinstance(optimizer, DoublecheckStep):
                    break
                optimizer = optimizer.optimizer
            if not isinstance(optimizer, DoublecheckStep) and (
                    not isinstance(optimizer, NaturalGradient)
                    or not optimizer.only_positive_updates) and (
                        not isinstance(self, Synchronization)
                        or self.sync_frequency is None):
                for delta, variable in zip(deltas, variables):
                    if '_distribution/mean/linear/' in variable.name:
                        # Gaussian.state_value does not use mean
                        continue
                    # if variable.name.endswith('/bias:0') and isinstance(self, Synchronization) \
                    #         and self.root.updates.numpy() == 0:
                    #     # Initialization values are equivalent for bias
                    #     continue
                    assertions.append(
                        tf.debugging.assert_equal(x=tf.math.logical_or(
                            x=tf.math.reduce_all(input_tensor=tf.math.greater(
                                x=tf.math.count_nonzero(
                                    input=delta,
                                    dtype=tf_util.get_dtype(type='int')),
                                y=tf_util.constant(value=0, dtype='int'))),
                            y=tf.reduce_all(input_tensor=tf.math.equal(
                                x=arguments['reward'],
                                y=tf_util.constant(value=0.0,
                                                   dtype='float')))),
                                                  y=tf_util.constant(
                                                      value=True,
                                                      dtype='bool'),
                                                  message=variable.name))

        with tf.control_dependencies(control_inputs=assertions):
            dependencies = list()

            if self.root.summaries == 'all' or 'update-norm' in self.root.summaries:
                with self.root.summarizer.as_default():
                    x = tf.linalg.global_norm(t_list=[
                        tf_util.cast(x=delta, dtype='float')
                        for delta in deltas
                    ])
                    dependencies.append(
                        tf.summary.scalar(name='update-norm',
                                          data=x,
                                          step=self.root.updates))

            if self.root.summaries == 'all' or 'updates' in self.root.summaries:
                with self.root.summarizer.as_default():
                    for var in variables:
                        assert var.name.startswith(
                            self.root.name + '/') and var.name[-2:] == ':0'
                        mean_name = var.name[len(self.root.name) +
                                             1:-2] + '-mean'
                        var_name = var.name[len(self.root.name) +
                                            1:-2] + '-variance'
                        mean, variance = tf.nn.moments(
                            x=var, axes=list(range(tf_util.rank(x=var))))
                        dependencies.append(
                            tf.summary.scalar(name=mean_name,
                                              data=mean,
                                              step=self.root.updates))
                        dependencies.append(
                            tf.summary.scalar(name=var_name,
                                              data=variance,
                                              step=self.root.updates))

        with tf.control_dependencies(control_inputs=dependencies):
            return tf_util.identity(
                input=tf_util.constant(value=True, dtype='bool'))
Exemplo n.º 9
0
    def update(self, *, arguments, variables, **kwargs):
        assert self.is_initialized_given_variables
        assert all(variable.dtype.is_floating for variable in variables)

        deltas = self.step(arguments=arguments, variables=variables, **kwargs)

        operations = list(deltas)
        if self.config.create_debug_assertions:
            from tensorforce.core.optimizers import DoublecheckStep, NaturalGradient, \
                Synchronization, UpdateModifier
            optimizer = self
            while isinstance(optimizer, UpdateModifier):
                if isinstance(optimizer, DoublecheckStep):
                    break
                optimizer = optimizer.optimizer
            if not isinstance(optimizer, DoublecheckStep) and (
                    not isinstance(optimizer, NaturalGradient)
                    or not optimizer.only_positive_updates) and (
                        not isinstance(self, Synchronization)
                        or self.sync_frequency is None):
                false = tf_util.constant(value=False, dtype='bool')
                zero = tf_util.constant(value=0, dtype='int')
                one = tf_util.constant(value=1, dtype='int')
                zero_float = tf_util.constant(value=0.0, dtype='float')
                y = tf.reduce_any(input_tensor=tf.math.not_equal(
                    x=arguments['reward'], y=zero_float))
                for index, (delta,
                            variable) in enumerate(zip(deltas, variables)):
                    if '_distribution/mean/linear/' in variable.name:
                        # Gaussian.state_value does not use mean
                        continue
                    is_zero = tf.math.logical_and(x=tf.math.equal(
                        x=tf.math.count_nonzero(
                            input=delta, dtype=tf_util.get_dtype(type='int')),
                        y=zero),
                                                  y=y)
                    index = tf_util.constant(value=index,
                                             dtype='int',
                                             shape=(1, ))
                    index = tf.stack(values=(tf.expand_dims(
                        input=self.zero_check_index, axis=0), index),
                                     axis=1)
                    operations.append(
                        tf.tensor_scatter_nd_update(
                            tensor=self.zero_check_history,
                            indices=index,
                            updates=tf.expand_dims(input=is_zero, axis=0)))

                operations.append(
                    tf.debugging.assert_equal(x=tf.math.reduce_any(
                        input_tensor=tf.math.reduce_all(
                            input_tensor=self.zero_check_history, axis=1),
                        axis=0),
                                              y=false))
                with tf.control_dependencies(control_inputs=operations):
                    operations = [
                        self.zero_check_index.assign(
                            value=tf.math.mod(x=one, y=3))
                    ]

        with tf.control_dependencies(control_inputs=operations):
            dependencies = list()

            if self.root.summaries == 'all' or 'update-norm' in self.root.summaries:
                with self.root.summarizer.as_default():
                    x = tf.linalg.global_norm(t_list=[
                        tf_util.cast(x=delta, dtype='float')
                        for delta in deltas
                    ])
                    dependencies.append(
                        tf.summary.scalar(name='update-norm',
                                          data=x,
                                          step=self.root.updates))

            if self.root.summaries == 'all' or 'updates' in self.root.summaries:
                with self.root.summarizer.as_default():
                    for var in variables:
                        assert var.name[-2] == ':'
                        if var.name.startswith(self.root.name + '/'):
                            mean_name = var.name[len(self.root.name) +
                                                 1:-2] + '-mean'
                            var_name = var.name[len(self.root.name) +
                                                1:-2] + '-variance'
                        else:
                            mean_name = var.name[:-2] + '-mean'
                            var_name = var.name[:-2] + '-variance'
                        mean, variance = tf.nn.moments(
                            x=var, axes=list(range(tf_util.rank(x=var))))
                        dependencies.append(
                            tf.summary.scalar(name=mean_name,
                                              data=mean,
                                              step=self.root.updates))
                        dependencies.append(
                            tf.summary.scalar(name=var_name,
                                              data=variance,
                                              step=self.root.updates))

        with tf.control_dependencies(control_inputs=dependencies):
            return tf_util.identity(
                input=tf_util.constant(value=True, dtype='bool'))