Пример #1
0
    def api_update(self):
        # Set global tensors
        Module.update_tensors(
            deterministic=tf.constant(value=True,
                                      dtype=util.tf_dtype(dtype='bool')),
            independent=tf.constant(value=False,
                                    dtype=util.tf_dtype(dtype='bool')),
            optimization=tf.constant(value=True,
                                     dtype=util.tf_dtype(dtype='bool')),
            timestep=self.global_timestep,
            episode=self.global_episode,
            update=self.global_update)

        # Core update: retrieve update operation
        updated = self.core_update()

        with tf.control_dependencies(control_inputs=(updated, )):
            # Function-level identity operation for retrieval (plus enforce dependency)
            timestep = util.identity_operation(
                x=self.global_timestep, operation_name='timestep-output')
            episode = util.identity_operation(x=self.global_episode,
                                              operation_name='episode-output')
            update = util.identity_operation(x=self.global_update,
                                             operation_name='update-output')

        return timestep, episode, update
Пример #2
0
    def tf_apply(self, x):
        def first_sequence():
            assignment = self.has_previous.assign(value=tf.constant(
                value=True, dtype=util.tf_dtype(dtype='bool')),
                                                  read_value=False)
            with tf.control_dependencies(control_inputs=(assignment, )):
                if self.concatenate:
                    current = x
                else:
                    current = tf.expand_dims(input=x, axis=(self.axis + 1))
                multiples = tuple(self.length if dims == self.axis + 1 else 1
                                  for dims in range(util.rank(x=current)))
                return tf.tile(input=x, multiples=multiples)

        def later_sequence():
            tf.concat(values=(self.previous, x))
            if self.concatenate:
                current = x
            else:
                current = tf.expand_dims(input=x, axis=(self.axis + 1))
            return tf.concat(values=(self.previous, current),
                             axis=(self.axis + 1))

        sequence = self.cond(pred=self.has_previous,
                             true_fn=later_sequence,
                             false_fn=first_sequence)

        assignment = self.previous.assign(value=tf.concat(
            values=(self.previous, x), axis=0)[-self.length + 1:],
                                          read_value=False)

        with tf.control_dependencies(control_inputs=(assignment, )):
            return util.identity_operation(x=sequence)
Пример #3
0
    def tf_core_update(self):
        Module.update_tensor(name='update', tensor=self.global_update)

        true = tf.constant(value=True, dtype=util.tf_dtype(dtype='bool'))
        one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))

        # Retrieve batch
        batch_size = self.update_batch_size.value()
        if self.update_unit == 'timesteps':
            # Timestep-based batch
            # Dependency horizon
            past_horizon = self.policy.past_horizon(is_optimization=True)
            past_horizon = tf.math.maximum(
                x=past_horizon, y=self.baseline_policy.past_horizon(is_optimization=True)
            )
            future_horizon = self.estimator.future_horizon()
            indices = self.memory.retrieve_timesteps(
                n=batch_size, past_horizon=past_horizon, future_horizon=future_horizon
            )
        elif self.update_unit == 'episodes':
            # Episode-based batch
            indices = self.memory.retrieve_episodes(n=batch_size)

        # Optimization
        optimized = self.optimize(indices=indices)

        # Increment update
        with tf.control_dependencies(control_inputs=(optimized,)):
            assignment = self.global_update.assign_add(delta=one, read_value=False)

        with tf.control_dependencies(control_inputs=(assignment,)):
            return util.identity_operation(x=true)
Пример #4
0
    def tf_step(self, variables, arguments, fn_loss, **kwargs):
        """
        Keyword Args:
            arguments: Dict of arguments for passing to fn_loss as **kwargs.
            fn_loss: A callable taking arguments as kwargs and returning the loss op.
        """
        # Trivial operation to enforce control dependency
        previous_variables = [
            util.identity_operation(x=variable) for variable in variables
        ]

        # Force loss value to be calculated.
        with tf.control_dependencies(control_inputs=previous_variables):
            loss = fn_loss(**arguments)

            # The actual tensorflow minimize op.
            applied = self.optimizer.minimize(loss=loss, var_list=variables)
            # colocate_gradients_with_ops=True

        # Return deltas after actually having change the variables.
        with tf.control_dependencies(control_inputs=(applied, )):
            return [
                variable - previous_variable for variable, previous_variable in
                zip(variables, previous_variables)
            ]
        def apply_step():
            # lambda = sqrt(c' / c)
            lagrange_multiplier = tf.sqrt(x=(constant / learning_rate))

            # delta = delta' / lambda
            estimated_deltas = [delta / lagrange_multiplier for delta in deltas]

            # improvement = grad(loss) * delta  (= loss_new - loss_old)
            estimated_improvement = tf.add_n(inputs=[
                tf.reduce_sum(input_tensor=(grad * delta))
                for grad, delta in zip(loss_gradients, estimated_deltas)
            ])

            # Apply natural gradient improvement.
            applied = self.apply_step(variables=variables, deltas=estimated_deltas)

            with tf.control_dependencies(control_inputs=(applied,)):
                # Trivial operation to enforce control dependency
                estimated_delta = [
                    util.identity_operation(x=estimated_delta)
                    for estimated_delta in estimated_deltas
                ]
                if return_estimated_improvement:
                    return estimated_delta, estimated_improvement
                else:
                    return estimated_delta
Пример #6
0
    def tf_apply(self, x):
        def first_delta():
            assignment = self.has_previous.assign(value=tf.constant(
                value=True, dtype=util.tf_dtype(dtype='bool')),
                                                  read_value=False)
            with tf.control_dependencies(control_inputs=(assignment, )):
                return tf.concat(values=(tf.zeros_like(input=x[:1]),
                                         x[1:] - x[:-1]),
                                 axis=0)  # dtype=util.tf_dtype(dtype='???'))

        def later_delta():
            return x - tf.concat(values=(self.previous, x[:-1]), axis=0)

        delta = self.cond(pred=self.has_previous,
                          true_fn=later_delta,
                          false_fn=first_delta)

        assignment = self.previous.assign(value=x[-1:], read_value=False)

        with tf.control_dependencies(control_inputs=(assignment, )):
            if self.concatenate is False:
                return util.identity_operation(x=delta)
            else:
                return tf.concat(values=(x, delta),
                                 axis=(self.concatenate + 1))
Пример #7
0
    def tf_step(self, variables, **kwargs):
        """
        Creates the TensorFlow operations for performing an optimization step.

        Args:
            variables: List of variables to optimize.
            **kwargs: Additional arguments passed on to the internal optimizer.

        Returns:
            List of delta tensors corresponding to the updates for each optimized variable.
        """
        deltas = self.optimizer.step(variables=variables, **kwargs)

        with tf.control_dependencies(control_inputs=deltas):
            clipping_value = self.clipping_value.value()
            clipped_deltas = list()
            exceeding_deltas = list()
            for delta in deltas:
                clipped_delta = tf.clip_by_value(
                    t=delta,
                    clip_value_min=-clipping_value,
                    clip_value_max=clipping_value)
                clipped_deltas.append(clipped_delta)
                exceeding_deltas.append(clipped_delta - delta)

        applied = self.apply_step(variables=variables, deltas=exceeding_deltas)

        with tf.control_dependencies(control_inputs=(applied, )):
            return [
                util.identity_operation(x=delta) for delta in clipped_deltas
            ]
Пример #8
0
    def tf_apply(self, x):
        assertion = tf.debugging.assert_equal(
            x=tf.shape(input=x)[0],
            y=1,
            message=
            "Deltafier preprocessor currently not compatible with batched Agent.act."
        )

        def first_delta():
            assignment = self.has_previous.assign(value=tf.constant(
                value=True, dtype=util.tf_dtype(dtype='bool')),
                                                  read_value=False)
            with tf.control_dependencies(control_inputs=(assignment, )):
                return tf.concat(values=(tf.zeros_like(input=x[:1]),
                                         x[1:] - x[:-1]),
                                 axis=0)

        def later_delta():
            return x - tf.concat(values=(self.previous, x[:-1]), axis=0)

        with tf.control_dependencies(control_inputs=(assertion, )):
            delta = self.cond(pred=self.has_previous,
                              true_fn=later_delta,
                              false_fn=first_delta)

            assignment = self.previous.assign(value=x[-1:], read_value=False)

        with tf.control_dependencies(control_inputs=(assignment, )):
            if self.concatenate is False:
                return util.identity_operation(x=delta)
            else:
                return tf.concat(values=(x, delta),
                                 axis=(self.concatenate + 1))
Пример #9
0
    def tf_apply(self, x):
        assertion = tf.debugging.assert_equal(
            x=tf.shape(input=x)[0],
            y=1,
            message=
            "Sequence preprocessor currently not compatible with batched Agent.act."
        )

        def first_timestep():
            assignment = self.has_previous.assign(value=tf.constant(
                value=True, dtype=util.tf_dtype(dtype='bool')),
                                                  read_value=False)
            with tf.control_dependencies(control_inputs=(assignment, )):
                if self.concatenate:
                    current = x
                else:
                    current = tf.expand_dims(input=x, axis=(self.axis + 1))
                multiples = tuple(self.length if dims == self.axis + 1 else 1
                                  for dims in range(util.rank(x=current)))
                return tf.tile(input=current, multiples=multiples)

        def other_timesteps():
            if self.concatenate:
                current = x
            else:
                current = tf.expand_dims(input=x, axis=(self.axis + 1))
            return tf.concat(values=(self.previous, current),
                             axis=(self.axis + 1))

        with tf.control_dependencies(control_inputs=(assertion, )):
            xs = self.cond(pred=self.has_previous,
                           true_fn=other_timesteps,
                           false_fn=first_timestep)

            if self.concatenate:
                begin = tuple(
                    self.input_spec['shape'][dims -
                                             1] if dims == self.axis + 1 else 0
                    for dims in range(util.rank(x=xs)))
            else:
                begin = tuple(1 if dims == self.axis + 1 else 0
                              for dims in range(util.rank(x=xs)))

            assignment = self.previous.assign(value=tf.slice(
                input_=xs, begin=begin, size=self.previous.shape),
                                              read_value=False)

        with tf.control_dependencies(control_inputs=(assignment, )):
            return util.identity_operation(x=xs)
Пример #10
0
        def apply_sync():
            update_weight = self.update_weight.value()
            deltas = list()
            for source_variable, target_variable in zip(
                    source_variables, variables):
                delta = update_weight * (source_variable - target_variable)
                deltas.append(delta)

            applied = self.apply_step(variables=variables, deltas=deltas)
            last_sync_updated = self.last_sync.assign(value=timestep)

            with tf.control_dependencies(control_inputs=(applied,
                                                         last_sync_updated)):
                # Trivial operation to enforce control dependency
                return [util.identity_operation(x=delta) for delta in deltas]
Пример #11
0
    def tf_apply(self, x, initial=None):
        zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))
        one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))
        dependency_starts = Module.retrieve_tensor(name='dependency_starts')
        dependency_lengths = Module.retrieve_tensor(name='dependency_lengths')
        if util.tf_dtype(dtype='long') in (tf.int32, tf.int64):
            batch_size = tf.shape(input=dependency_starts,
                                  out_type=util.tf_dtype(dtype='long'))[0]
        else:
            batch_size = tf.dtypes.cast(x=tf.shape(input=dependency_starts)[0],
                                        dtype=util.tf_dtype(dtype='long'))
        zeros = tf.zeros(shape=(batch_size, ),
                         dtype=util.tf_dtype(dtype='long'))
        ones = tf.ones(shape=(batch_size, ), dtype=util.tf_dtype(dtype='long'))
        # maximum_iterations = tf.math.reduce_max(input_tensor=lengths, axis=0)
        horizon = self.dependency_horizon.value() + one  # including 0th step
        starts = dependency_starts + tf.maximum(
            x=(dependency_lengths - horizon), y=zeros)
        lengths = dependency_lengths - tf.maximum(
            x=(dependency_lengths - horizon), y=zeros)
        horizon = tf.minimum(x=horizon,
                             y=tf.math.reduce_max(input_tensor=lengths,
                                                  axis=0))

        if self.processing == 'cumulative':

            def body(indices, remaining, xs):
                current_x = tf.gather(params=x, indices=indices)
                current_x = tf.expand_dims(input=current_x, axis=1)
                xs = tf.concat(values=(xs, current_x), axis=1)
                remaining -= tf.where(condition=tf.math.equal(x=remaining,
                                                              y=zeros),
                                      x=zeros,
                                      y=ones)
                indices += tf.where(condition=tf.math.equal(x=remaining,
                                                            y=zeros),
                                    x=zeros,
                                    y=ones)
                return indices, remaining, xs

            initial_xs = tf.zeros(
                shape=((batch_size, 0) + self.output_spec['shape']),
                dtype=util.tf_dtype(dtype=self.output_spec['type']))

            final_indices, final_remaining, final_xs = self.while_loop(
                cond=util.tf_always_true,
                body=body,
                loop_vars=(starts, lengths, initial_xs),
                back_prop=True,
                maximum_iterations=horizon)

            # initial_xs = tf.gather(params=x, indices=starts)
            # initial_xs = tf.expand_dims(input=initial_xs, axis=1)
            # missing = tf.expand_dims(input=horizon, axis=0) - lengths
            # missing -= tf.where(condition=tf.math.equal(x=missing, y=zeros), x=zeros, y=ones)
            # starts += tf.where(condition=tf.math.equal(x=missing, y=zeros), x=ones, y=zeros)

            # final_indices, final_counter, final_xs = self.while_loop(
            #     cond=util.tf_always_true, body=body, loop_vars=(starts, missing, initial_xs),
            #     back_prop=True, maximum_iterations=(horizon - one)
            # )

        elif self.processing == 'iterative':

            def body(indices, remaining, current_x, current_aggregates):
                current_x = tf.gather(params=x, indices=indices)
                next_x, next_aggregates = self.iterative_step(
                    x=current_x, previous=current_aggregates)
                with tf.control_dependencies(control_inputs=(current_x,
                                                             next_x)):
                    is_finished = tf.math.equal(x=remaining, y=zeros)
                    if isinstance(next_aggregates, dict):
                        for name, current_aggregate, next_aggregate in util.zip_items(
                                current_aggregates, next_aggregates):
                            condition = is_finished
                            for _ in range(util.rank(x=current_aggregate) - 1):
                                condition = tf.expand_dims(input=condition,
                                                           axis=1)
                            next_aggregates[name] = tf.where(
                                condition=condition,
                                x=current_aggregate,
                                y=next_aggregate)
                    else:
                        condition = is_finished
                        for _ in range(util.rank(x=current_aggregates) - 1):
                            condition = tf.expand_dims(input=condition, axis=1)
                        next_aggregates = tf.where(condition=condition,
                                                   x=current_aggregates,
                                                   y=next_aggregates)
                    remaining -= tf.where(condition=is_finished,
                                          x=zeros,
                                          y=ones)
                    indices += tf.where(condition=tf.math.equal(x=remaining,
                                                                y=zeros),
                                        x=zeros,
                                        y=ones)
                return indices, remaining, next_x, next_aggregates

            initial_x = tf.zeros(
                shape=((batch_size, ) + self.output_spec['shape']),
                dtype=util.tf_dtype(dtype=self.output_spec['type']))

            if initial is None:
                initial_aggregates = self.initial_values()
            else:
                initial_aggregates = initial

            final_indices, final_remaining, final_x, final_aggregates = self.while_loop(
                cond=util.tf_always_true,
                body=body,
                loop_vars=(starts, lengths, initial_x, initial_aggregates),
                back_prop=True,
                maximum_iterations=horizon)

        # assertions = [
        #     tf.debugging.assert_equal(
        #         x=final_indices, y=(tf.math.cumsum(x=dependency_lengths) - ones)
        #     ),
        #     tf.debugging.assert_equal(
        #         x=tf.math.reduce_sum(input_tensor=final_remaining, axis=0), y=zero
        #     )
        # ]

        # with tf.control_dependencies(control_inputs=assertions):
        if self.processing == 'cumulative':
            return super().tf_apply(x=self.cumulative_apply(xs=final_xs))
        elif self.processing == 'iterative':
            if initial is None:
                return util.identity_operation(x=super().tf_apply(x=final_x))
            else:
                return util.identity_operation(x=super().tf_apply(
                    x=final_x)), final_aggregates
Пример #12
0
    def tf_core_update(self):
        Module.update_tensor(name='update', tensor=self.global_update)
        Module.global_summary_step = 'update'

        true = tf.constant(value=True, dtype=util.tf_dtype(dtype='bool'))
        one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))

        assignment = self.global_update.assign_add(delta=one, read_value=False)

        # Retrieve batch
        with tf.control_dependencies(control_inputs=(assignment, )):
            batch_size = self.update_batch_size.value()
            if self.update_unit == 'timesteps':
                # Timestep-based batch
                # Dependency horizon
                past_horizon = self.policy.dependency_horizon(
                    is_optimization=True)
                if self.baseline_policy is not None:
                    past_horizon = tf.math.maximum(
                        x=past_horizon,
                        y=self.baseline_policy.dependency_horizon(
                            is_optimization=True))
                future_horizon = self.estimator.horizon.value() + one
                indices = self.memory.retrieve_timesteps(
                    n=batch_size,
                    past_padding=past_horizon,
                    future_padding=future_horizon)
            elif self.update_unit == 'episodes':
                # Episode-based batch
                indices = self.memory.retrieve_episodes(n=batch_size)

        # Optimization
        optimized = self.optimize(indices=indices)

        # dependency_horizon = self.policy.dependency_horizon(is_optimization=True)
        # if self.baseline_policy is not None:
        #     dependency_horizon = tf.maximum(
        #         x=dependency_horizon,
        #         y=self.baseline_policy.dependency_horizon(is_optimization=True)
        #     )

        # # Retrieve dependency horizon
        # horizon change: see timestep-based batch sampling
        # starts, lengths, states, internals = self.memory.predecessors(
        #     indices=indices, horizon=dependency_horizon, sequence_values='states',
        #     initial_values='internals'
        # )
        # actions, reward = self.memory.retrieve(indices=indices, values=('actions', 'reward'))
        # Module.update_tensors(dependency_starts=starts, dependency_lengths=lengths)

        # # Stop gradients of batch before optimization
        # states = util.fmap(function=tf.stop_gradient, xs=states)
        # internals = util.fmap(function=tf.stop_gradient, xs=internals)
        # actions = util.fmap(function=tf.stop_gradient, xs=actions)
        # reward = tf.stop_gradient(input=reward)

        # # Optimization
        # optimized = self.optimize(
        #     indices=indices, states=states, internals=internals, actions=actions, reward=reward
        # )

        with tf.control_dependencies(control_inputs=(optimized, )):
            return util.identity_operation(x=true)
    def tf_step(self, variables, arguments, fn_loss, **kwargs):
        """
        Creates the TensorFlow operations for performing an optimization step.

        Args:
            variables: List of variables to optimize.
            arguments: Dict of arguments for callables, like fn_loss.
            fn_loss: A callable returning the loss of the current model.
            **kwargs: Additional arguments, not used.

        Returns:
            List of delta tensors corresponding to the updates for each optimized variable.
        """
        learning_rate = self.learning_rate.value()
        unperturbed_loss = fn_loss(**arguments)

        deltas = [tf.zeros_like(tensor=variable) for variable in variables]
        previous_perturbations = [
            tf.zeros_like(tensor=variable) for variable in variables
        ]

        if self.unroll_loop:
            # Unrolled for loop
            for sample in range(self.num_samples):
                with tf.control_dependencies(control_inputs=deltas):
                    perturbations = [
                        tf.random_normal(shape=util.shape(variable)) *
                        learning_rate for variable in variables
                    ]
                    perturbation_deltas = [
                        pert - prev_pert for pert, prev_pert in zip(
                            perturbations, previous_perturbations)
                    ]
                    applied = self.apply_step(variables=variables,
                                              deltas=perturbation_deltas)
                    previous_perturbations = perturbations

                with tf.control_dependencies(control_inputs=(applied, )):
                    perturbed_loss = fn_loss(**arguments)
                    direction = tf.sign(x=(unperturbed_loss - perturbed_loss))
                    deltas = [
                        delta + direction * perturbation
                        for delta, perturbation in zip(deltas, perturbations)
                    ]

        else:
            # TensorFlow while loop
            def body(deltas, previous_perturbations):
                with tf.control_dependencies(control_inputs=deltas):
                    perturbations = [
                        tf.random_normal(shape=util.shape(variable)) *
                        learning_rate for variable in variables
                    ]
                    perturbation_deltas = [
                        pert - prev_pert for pert, prev_pert in zip(
                            perturbations, previous_perturbations)
                    ]
                    applied = self.apply_step(variables=variables,
                                              deltas=perturbation_deltas)

                with tf.control_dependencies(control_inputs=(applied, )):
                    perturbed_loss = fn_loss(**arguments)
                    direction = tf.sign(x=(unperturbed_loss - perturbed_loss))
                    deltas = [
                        delta + direction * perturbation
                        for delta, perturbation in zip(deltas, perturbations)
                    ]

                return deltas, perturbations

            num_samples = self.num_samples.value()
            deltas, perturbations = self.while_loop(
                cond=util.tf_always_true,
                body=body,
                loop_vars=(deltas, previous_perturbations),
                maximum_iterations=num_samples)

        with tf.control_dependencies(control_inputs=deltas):
            num_samples = tf.dtypes.cast(x=num_samples,
                                         dtype=util.tf_dtype(dtype='float'))
            deltas = [delta / num_samples for delta in deltas]
            perturbation_deltas = [
                delta - pert for delta, pert in zip(deltas, perturbations)
            ]
            applied = self.apply_step(variables=variables,
                                      deltas=perturbation_deltas)

        with tf.control_dependencies(control_inputs=(applied, )):
            # Trivial operation to enforce control dependency
            return [util.identity_operation(x=delta) for delta in deltas]
Пример #14
0
    def api_experience(self):
        # Inputs
        states = self.states_input
        internals = self.internals_input
        auxiliaries = self.auxiliaries_input
        actions = self.actions_input
        terminal = self.terminal_input
        reward = self.reward_input

        zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))

        # Assertions
        assertions = [
            # terminal: type and shape
            tf.debugging.assert_type(tensor=terminal,
                                     tf_type=util.tf_dtype(dtype='long')),
            tf.debugging.assert_rank(x=terminal, rank=1),
            # reward: type and shape
            tf.debugging.assert_type(tensor=reward,
                                     tf_type=util.tf_dtype(dtype='float')),
            tf.debugging.assert_rank(x=reward, rank=1),
            # shape of terminal equals shape of reward
            tf.debugging.assert_equal(x=tf.shape(input=terminal),
                                      y=tf.shape(input=reward)),
            # buffer index is zero
            tf.debugging.assert_equal(
                x=tf.math.reduce_sum(input_tensor=self.buffer_index, axis=0),
                y=tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))),
            # at most one terminal
            tf.debugging.assert_less_equal(
                x=tf.math.count_nonzero(input_tensor=terminal,
                                        dtype=util.tf_dtype(dtype='long')),
                y=tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))),
            # if terminal, last timestep in batch
            tf.debugging.assert_equal(x=tf.math.reduce_any(
                input_tensor=tf.math.greater(x=terminal, y=zero)),
                                      y=tf.math.greater(x=terminal[-1],
                                                        y=zero))
        ]
        batch_size = tf.shape(input=terminal)[:1]
        # states: type and shape
        for name, spec in self.states_spec.items():
            assertions.append(
                tf.debugging.assert_type(
                    tensor=states[name],
                    tf_type=util.tf_dtype(dtype=spec['type'])))
            shape = self.unprocessed_state_shape.get(name, spec['shape'])
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.shape(input=states[name], out_type=tf.int32),
                    y=tf.concat(values=(batch_size,
                                        tf.constant(value=shape,
                                                    dtype=tf.int32)),
                                axis=0)))
        # internals: type and shape
        for name, spec in self.internals_spec.items():
            assertions.append(
                tf.debugging.assert_type(
                    tensor=internals[name],
                    tf_type=util.tf_dtype(dtype=spec['type'])))
            shape = spec['shape']
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.shape(input=internals[name], out_type=tf.int32),
                    y=tf.concat(values=(batch_size,
                                        tf.constant(value=shape,
                                                    dtype=tf.int32)),
                                axis=0)))
        # action_masks: type and shape
        for name, spec in self.actions_spec.items():
            if spec['type'] == 'int':
                name = name + '_mask'
                assertions.append(
                    tf.debugging.assert_type(
                        tensor=auxiliaries[name],
                        tf_type=util.tf_dtype(dtype='bool')))
                shape = spec['shape'] + (spec['num_values'], )
                assertions.append(
                    tf.debugging.assert_equal(
                        x=tf.shape(input=auxiliaries[name], out_type=tf.int32),
                        y=tf.concat(values=(batch_size,
                                            tf.constant(value=shape,
                                                        dtype=tf.int32)),
                                    axis=0)))
        # actions: type and shape
        for name, spec in self.actions_spec.items():
            assertions.append(
                tf.debugging.assert_type(
                    tensor=actions[name],
                    tf_type=util.tf_dtype(dtype=spec['type'])))
            shape = spec['shape']
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.shape(input=actions[name], out_type=tf.int32),
                    y=tf.concat(values=(batch_size,
                                        tf.constant(value=shape,
                                                    dtype=tf.int32)),
                                axis=0)))

        # Set global tensors
        Module.update_tensors(
            deterministic=tf.constant(value=True,
                                      dtype=util.tf_dtype(dtype='bool')),
            independent=tf.constant(value=True,
                                    dtype=util.tf_dtype(dtype='bool')),
            optimization=tf.constant(value=False,
                                     dtype=util.tf_dtype(dtype='bool')),
            timestep=self.global_timestep,
            episode=self.global_episode,
            update=self.global_update)

        with tf.control_dependencies(control_inputs=assertions):
            # Core experience: retrieve experience operation
            experienced = self.core_experience(states=states,
                                               internals=internals,
                                               auxiliaries=auxiliaries,
                                               actions=actions,
                                               terminal=terminal,
                                               reward=reward)

        with tf.control_dependencies(control_inputs=(experienced, )):
            # Function-level identity operation for retrieval (plus enforce dependency)
            timestep = util.identity_operation(
                x=self.global_timestep, operation_name='timestep-output')
            episode = util.identity_operation(x=self.global_episode,
                                              operation_name='episode-output')
            update = util.identity_operation(x=self.global_update,
                                             operation_name='update-output')

        return timestep, episode, update
Пример #15
0
    def add_summary(self,
                    label,
                    name,
                    tensor,
                    pass_tensors=None,
                    return_summaries=False,
                    mean_variance=False,
                    enumerate_last_rank=False):
        # should be "labels" !!!
        # label
        if util.is_iterable(x=label):
            if not all(isinstance(x, str) for x in label):
                raise TensorforceError.type(name='summary',
                                            argument='label',
                                            value=label)
        else:
            if not isinstance(label, str):
                raise TensorforceError.type(name='summary',
                                            argument='label',
                                            value=label)
        # name
        if not isinstance(name, str):
            raise TensorforceError.type(name='summary',
                                        argument='name',
                                        value=name)
        # tensor
        if not isinstance(tensor, tf.Tensor):
            raise TensorforceError.type(name='summary',
                                        argument='tensor',
                                        value=tensor)
        # pass_tensors
        if util.is_iterable(x=pass_tensors):
            if not all(isinstance(x, tf.Tensor) for x in pass_tensors):
                raise TensorforceError.type(name='summary',
                                            argument='pass_tensors',
                                            value=pass_tensors)
        elif pass_tensors is not None:
            if not isinstance(pass_tensors, tf.Tensor):
                raise TensorforceError.type(name='summary',
                                            argument='pass_tensors',
                                            value=pass_tensors)
        # enumerate_last_rank
        if not isinstance(enumerate_last_rank, bool):
            raise TensorforceError.type(name='summary',
                                        argument='enumerate_last_rank',
                                        value=tensor)

        if pass_tensors is None:
            pass_tensors = tensor

        # Check whether summaries are logged
        if self.summary_labels is None:
            return pass_tensors

        # Check whether not in while loop
        if 'while' in Module.global_scope:  # 'cond' in Module.global_scope
            return pass_tensors

        # Check whether given label is logged
        if util.is_iterable(x=label):
            if all(x not in self.summary_labels for x in label):
                return pass_tensors
        else:
            if label not in self.summary_labels:
                return pass_tensors

        # Handle enumerate_last_rank
        if enumerate_last_rank:
            num_dims = util.shape(x=tensor)[-1]
            tensors = OrderedDict([(name + str(n), tensor[..., n])
                                   for n in range(num_dims)])
        else:
            tensors = OrderedDict([(name, tensor)])

        if mean_variance:
            for name in list(tensors):
                tensor = tensors.pop(name)
                mean, variance = tf.nn.moments(x=tensor,
                                               axes=tuple(
                                                   range(util.rank(x=tensor))))
                tensors[name + '-mean'] = mean
                tensors[name + '-variance'] = variance

        # TensorFlow summaries
        summaries = list()
        for name, tensor in tensors.items():
            shape = util.shape(x=tensor)
            if shape == () or shape == (-1, ):
                # Scalar
                summaries.append(
                    tf.contrib.summary.scalar(name=name, tensor=tensor))
            elif shape == (1, ) or shape == (-1, 1):
                # Single-value tensor as scalar
                tensor = tf.squeeze(input=tensor, axis=-1)
                summaries.append(
                    tf.contrib.summary.scalar(name=name, tensor=tensor))
            else:
                # General tensor as histogram
                summaries.append(
                    tf.contrib.summary.histogram(name=name, tensor=tensor))

        with tf.control_dependencies(control_inputs=summaries):
            if util.is_iterable(x=pass_tensors):
                return tuple(
                    util.identity_operation(x=x) for x in pass_tensors)
            else:
                return util.identity_operation(x=pass_tensors)
Пример #16
0
    def api_experience(self):
        # Inputs
        states = OrderedDict(self.states_input)
        internals = OrderedDict(self.internals_input)
        auxiliaries = OrderedDict(self.auxiliaries_input)
        actions = OrderedDict(self.actions_input)
        terminal = self.terminal_input
        reward = self.reward_input

        zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))
        true = tf.constant(value=True, dtype=util.tf_dtype(dtype='bool'))
        batch_size = tf.shape(input=terminal)[:1]

        # Assertions
        assertions = list()
        # terminal: type and shape
        tf.debugging.assert_type(
            tensor=terminal, tf_type=util.tf_dtype(dtype='long'),
            message="Agent.experience: invalid type for terminal input."
        )
        assertions.append(tf.debugging.assert_rank(
            x=terminal, rank=1, message="Agent.experience: invalid shape for terminal input."
        ))
        # reward: type and shape
        tf.debugging.assert_type(
            tensor=reward, tf_type=util.tf_dtype(dtype='float'),
            message="Agent.experience: invalid type for reward input."
        )
        assertions.append(tf.debugging.assert_rank(
            x=reward, rank=1, message="Agent.experience: invalid shape for reward input."
        ))
        # shape of terminal equals shape of reward
        assertions.append(tf.debugging.assert_equal(
            x=tf.shape(input=terminal), y=tf.shape(input=reward),
            message="Agent.experience: incompatible shapes of terminal and reward input."
        ))
        # buffer index is zero
        assertions.append(tf.debugging.assert_equal(
            x=tf.math.reduce_sum(input_tensor=self.buffer_index, axis=0),
            y=tf.constant(value=0, dtype=util.tf_dtype(dtype='long')),
            message="Agent.experience: cannot be called mid-episode."
        ))
        # at most one terminal
        assertions.append(tf.debugging.assert_less_equal(
            x=tf.math.count_nonzero(input=terminal, dtype=util.tf_dtype(dtype='long')),
            y=tf.constant(value=1, dtype=util.tf_dtype(dtype='long')),
            message="Agent.experience: input contains more than one terminal."
        ))
        # if terminal, last timestep in batch
        assertions.append(tf.debugging.assert_equal(
            x=tf.math.reduce_any(input_tensor=tf.math.greater(x=terminal, y=zero)),
            y=tf.math.greater(x=terminal[-1], y=zero),
            message="Agent.experience: terminal is not the last input timestep."
        ))
        # states: type and shape
        for name, spec in self.states_spec.items():
            spec = self.unprocessed_state_spec.get(name, spec)
            tf.debugging.assert_type(
                tensor=states[name], tf_type=util.tf_dtype(dtype=spec['type']),
                message="Agent.experience: invalid type for {} state input.".format(name)
            )
            shape = tf.constant(value=spec['shape'], dtype=util.tf_dtype(dtype='int'))
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.shape(input=states[name], out_type=util.tf_dtype(dtype='int')),
                    y=tf.concat(values=(batch_size, shape), axis=0),
                    message="Agent.experience: invalid shape for {} state input.".format(name)
                )
            )
        # internals: type and shape
        for name, spec in self.internals_spec.items():
            tf.debugging.assert_type(
                tensor=internals[name], tf_type=util.tf_dtype(dtype=spec['type']),
                message="Agent.experience: invalid type for {} internal input.".format(name)
            )
            shape = tf.constant(value=spec['shape'], dtype=util.tf_dtype(dtype='int'))
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.shape(input=internals[name], out_type=util.tf_dtype(dtype='int')),
                    y=tf.concat(values=(batch_size, shape), axis=0),
                    message="Agent.experience: invalid shape for {} internal input.".format(name)
                )
            )
        # action_masks: type and shape
        for name, spec in self.actions_spec.items():
            if spec['type'] == 'int':
                name = name + '_mask'
                tf.debugging.assert_type(
                    tensor=auxiliaries[name], tf_type=util.tf_dtype(dtype='bool'),
                    message="Agent.experience: invalid type for {} action-mask input.".format(name)
                )
                shape = tf.constant(
                    value=(spec['shape'] + (spec['num_values'],)), dtype=util.tf_dtype(dtype='int')
                )
                assertions.append(
                    tf.debugging.assert_equal(
                        x=tf.shape(input=auxiliaries[name], out_type=util.tf_dtype(dtype='int')),
                        y=tf.concat(values=(batch_size, shape), axis=0),
                        message="Agent.experience: invalid shape for {} action-mask input.".format(
                            name
                        )
                    )
                )
                assertions.append(
                    tf.debugging.assert_equal(
                        x=tf.reduce_all(
                            input_tensor=tf.reduce_any(
                                input_tensor=auxiliaries[name], axis=(len(spec['shape']) + 1)
                            ), axis=tuple(range(len(spec['shape']) + 1))
                        ),
                        y=true, message="Agent.experience: at least one action has to be valid "
                                        "for {} action-mask input.".format(name)
                    )
                )
        # actions: type and shape
        for name, spec in self.actions_spec.items():
            tf.debugging.assert_type(
                tensor=actions[name], tf_type=util.tf_dtype(dtype=spec['type']),
                message="Agent.experience: invalid type for {} action input.".format(name)
            )
            shape = tf.constant(value=spec['shape'], dtype=util.tf_dtype(dtype='int'))
            assertions.append(
                tf.debugging.assert_equal(
                    x=tf.shape(input=actions[name], out_type=util.tf_dtype(dtype='int')),
                    y=tf.concat(values=(batch_size, shape), axis=0),
                    message="Agent.experience: invalid shape for {} action input.".format(name)
                )
            )

        # Set global tensors
        Module.update_tensors(
            independent=tf.constant(value=False, dtype=util.tf_dtype(dtype='bool')),
            deterministic=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')),
            timestep=self.global_timestep, episode=self.global_episode, update=self.global_update
        )

        with tf.control_dependencies(control_inputs=assertions):
            # Preprocessing states
            if any(name in self.preprocessing for name in self.states_spec):
                for name in self.states_spec:
                    if name in self.preprocessing:
                        states[name] = self.preprocessing[name].apply(x=states[name])

            # Preprocessing reward
            if 'reward' in self.preprocessing:
                reward = self.preprocessing['reward'].apply(x=reward)

            # Core experience: retrieve experience operation
            experienced = self.core_experience(
                states=states, internals=internals, auxiliaries=auxiliaries, actions=actions,
                terminal=terminal, reward=reward
            )

        with tf.control_dependencies(control_inputs=(experienced,)):
            # Function-level identity operation for retrieval (plus enforce dependency)
            timestep = util.identity_operation(
                x=self.global_timestep, operation_name='timestep-output'
            )
            episode = util.identity_operation(
                x=self.global_episode, operation_name='episode-output'
            )
            update = util.identity_operation(
                x=self.global_update, operation_name='update-output'
            )

        return timestep, episode, update
 def undo_deltas():
     value = self.fn_x([-delta for delta in deltas])
     with tf.control_dependencies(control_inputs=(value, )):
         return [util.identity_operation(x=t) for t in x_final]
Пример #18
0
    def create_api_function(self, name, api_function):
        # Call API TensorFlow function
        Module.global_scope = list()
        Module.scope_stack = list()
        Module.while_counter = 0
        Module.cond_counter = 0
        Module.global_tensors = OrderedDict()
        Module.queryable_tensors = OrderedDict()

        if self.device is not None:
            self.device.__enter__()
        scope = tf.name_scope(name=name)
        Module.scope_stack.append(scope)
        scope.__enter__()

        results = api_function()
        self.output_tensors[name[name.index('.') + 1:]] = sorted(
            x.name[len(name) + 1: -9] for x in util.flatten(xs=results)
        )

        # Function-level identity operation for retrieval
        query_tensors = set()
        for scoped_name, tensor in Module.queryable_tensors.items():
            util.identity_operation(x=tensor, operation_name=(scoped_name + '-output'))
            assert scoped_name not in query_tensors
            query_tensors.add(scoped_name)
        self.query_tensors[name[name.index('.') + 1:]] = sorted(query_tensors)

        scope.__exit__(None, None, None)
        Module.scope_stack.pop()
        if self.device is not None:
            self.device.__exit__(None, None, None)

        assert len(Module.global_scope) == 0
        Module.global_scope = None
        assert len(Module.scope_stack) == 0
        Module.scope_stack = None
        Module.while_counter = None
        Module.cond_counter = None
        Module.global_tensors = None
        Module.queryable_tensors = None

        def fn(query=None, **kwargs):
            # Feed_dict dictionary
            feed_dict = dict()
            for key, arg in kwargs.items():
                if arg is None:
                    continue
                elif isinstance(arg, dict):
                    # Support single nesting (for states, internals, actions)
                    for key, arg in arg.items():
                        feed_dict[util.join_scopes(self.name, key) + '-input:0'] = arg
                else:
                    feed_dict[util.join_scopes(self.name, key) + '-input:0'] = arg
            if not all(isinstance(x, str) and x.endswith('-input:0') for x in feed_dict):
                raise TensorforceError.value(
                    name=api_function, argument='inputs', value=list(feed_dict)
                )

            # Fetches value/tuple
            fetches = util.fmap(function=(lambda x: x.name), xs=results)
            if query is not None:
                # If additional tensors are to be fetched
                query = util.fmap(
                    function=(lambda x: util.join_scopes(name, x) + '-output:0'), xs=query
                )
                if util.is_iterable(x=fetches):
                    fetches = tuple(fetches) + (query,)
                else:
                    fetches = (fetches, query)
            if not util.reduce_all(
                predicate=(lambda x: isinstance(x, str) and x.endswith('-output:0')), xs=fetches
            ):
                raise TensorforceError.value(
                    name=api_function, argument='outputs', value=list(fetches)
                )

            # TensorFlow session call
            fetched = self.monitored_session.run(fetches=fetches, feed_dict=feed_dict)

            return fetched

        return fn
Пример #19
0
    def create_api_function(self, name, api_function):
        # Call API TensorFlow function
        Module.global_scope = list()
        Module.global_tensors = OrderedDict()
        if self.device is not None:
            self.device.__enter__()
        with tf.name_scope(name=name):
            results = api_function()

            # Function-level identity operation for retrieval
            for scoped_name, tensor in Module.global_tensors.items():
                if '/cond/' not in scoped_name and '/while/' not in scoped_name:
                    util.identity_operation(x=tensor,
                                            operation_name=(scoped_name +
                                                            '-output'))

        if self.device is not None:
            self.device.__exit__(None, None, None)
        Module.global_tensors = None
        Module.global_scope = None

        def fn(query=None, **kwargs):
            # Feed_dict dictionary
            feed_dict = dict()
            for key, arg in kwargs.items():
                if arg is None:
                    continue
                elif isinstance(arg, dict):
                    # Support single nesting (for states, internals, actions)
                    for key, arg in arg.items():
                        feed_dict[util.join_scopes(self.name, key) +
                                  '-input:0'] = arg
                else:
                    feed_dict[util.join_scopes(self.name, key) +
                              '-input:0'] = arg
            if not all(
                    isinstance(x, str) and x.endswith('-input:0')
                    for x in feed_dict):
                raise TensorforceError.unexpected()

            # Fetches value/tuple
            fetches = util.fmap(function=(lambda x: x.name), xs=results)
            if query is not None:
                # If additional tensors are to be fetched
                query = util.fmap(function=(
                    lambda x: util.join_scopes(name, x) + '-output:0'),
                                  xs=query)
                if util.is_iterable(x=fetches):
                    fetches = tuple(fetches) + (query, )
                else:
                    fetches = (fetches, query)
            if not util.reduce_all(predicate=(
                    lambda x: isinstance(x, str) and x.endswith('-output:0')),
                                   xs=fetches):
                raise TensorforceError.unexpected()

            # TensorFlow session call
            fetched = self.monitored_session.run(fetches=fetches,
                                                 feed_dict=feed_dict)

            return fetched

        return fn