Пример #1
0
    def _find_best_span(args):
        """Compute the best span."""
        current_start_scores, current_end_scores = args

        # [seq_len], [seq_len]
        start_max, start_backpointers, _, _ = tf.scan(
            fn=_cumulative_max,
            elems=current_start_scores,
            initializer=(float("-inf"), -1, 0, 1),
            back_prop=False,
            reverse=False)
        end_max, end_backpointers, _, _ = tf.scan(fn=_cumulative_max,
                                                  elems=current_end_scores,
                                                  initializer=(float("-inf"),
                                                               -1, seq_len - 1,
                                                               -1),
                                                  back_prop=False,
                                                  reverse=True)

        # []
        total_max = start_max + end_max
        best_index = tf.argmax(total_max, -1)
        best_start = start_backpointers[best_index]
        best_end = end_backpointers[best_index]
        best_score = total_max[best_index]

        return best_start, best_end, best_score
Пример #2
0
def reverse_part(inputs, hparams, n_bits):
    """Reverse part of Benes block.

  Repeatably applies interleaved Residual Switch layer and Reverse Shuffle
  Layer. One set of weights used for all Switch layers.

  Args:
    inputs: inputs for reverse part. Should be outputs from forward part.
    hparams: params of the network.
    n_bits: count of repeated layer applications.

  Returns:
    tf.Tensor: output of reverse part.
  """
    reverse_rsu = RSU("reverse_switch", hparams.dropout, hparams.mode)

    def reverse_step(state, _):
        with tf.variable_scope("reverse"):
            new_state = reverse_rsu(state)
            return reverse_shuffle_layer(new_state)

    reverse_outputs = tf.scan(reverse_step,
                              tf.range(n_bits, n_bits * 2),
                              initializer=inputs,
                              parallel_iterations=1,
                              swap_memory=True)

    return reverse_outputs[-1, :, :, :]
Пример #3
0
 def forward_pass_states(self, processed_input, initial_hidden):
     all_hidden_states = tf.scan(
                             self.forward_pass_gru, 
                             processed_input, 
                             initializer=initial_hidden, 
                             name='states')
     return all_hidden_states
Пример #4
0
    def lstm(self, inps):
        weight, bias = self.recurrent_weight, self.recurrent_bias

        init_state = tf.zeros(shape=[2, inps.shape[0], self.hidden_size],
                              dtype=tf.float32)

        def step(hprev, x):
            st_1, ct_1 = tf.unstack(hprev)
            rows, columns, values, row_indices, row_offsets, column_indices = self.dynamic_gate(
                x)

            fc_gate = kernels.spmm(rows, columns, values, row_indices,
                                   row_offsets, column_indices,
                                   tf.transpose(tf.concat([x, st_1],
                                                          -1)), False, False)

            fc_gate = tf.transpose(fc_gate) + bias

            i, f, g, o = tf.split(fc_gate, 4, axis=1)
            i, f, g, o = tf.sigmoid(i), tf.sigmoid(f), tf.tanh(g), tf.sigmoid(
                o)
            ct = ct_1 * f + g * i
            st = tf.tanh(ct) * o

            return tf.stack([st, ct])

        states = tf.scan(step,
                         tf.transpose(inps, [1, 0, 2]),
                         initializer=init_state)

        return tf.transpose(states, [1, 2, 0, 3])[0]
Пример #5
0
def diagonal_neural_gpu(inputs, hparams, name=None):
    """Improved Neural GPU as in https://arxiv.org/abs/1702.08727."""
    with tf.variable_scope(name, "diagonal_neural_gpu"):

        def step(state_tup, inp):
            """Single step of the improved Neural GPU."""
            state, _ = state_tup
            x = state
            for layer in range(hparams.num_hidden_layers):
                x, new_loss = common_layers.diagonal_conv_gru(
                    x, (hparams.kernel_height, hparams.kernel_width),
                    hparams.hidden_size,
                    dropout=hparams.dropout,
                    name="dcgru_%d" % layer)
            # Padding input is zeroed-out in the modality, we check this by summing.
            padding_inp = tf.less(tf.reduce_sum(tf.abs(inp), axis=[1, 2]),
                                  0.00001)
            new_state = tf.where(padding_inp, state,
                                 x)  # No-op where inp is padding.
            return new_state, new_loss

        final_state, losses = tf.scan(step,
                                      tf.transpose(inputs, [1, 0, 2, 3]),
                                      initializer=(inputs, tf.constant(0.0)),
                                      parallel_iterations=1,
                                      swap_memory=True)
        return final_state[0, :, :, :, :], 2.0 * tf.reduce_mean(losses)
Пример #6
0
def forward_part(block_out, hparams, n_bits):
    """Forward part of Benes block.

  Repeatably applies interleaved Residual Switch layer and Shuffle
  Layer. One set of weights used for all Switch layers.

  Args:
    block_out: TODO(authors) document.
    hparams: params of the network.
    n_bits: count of repeated layer applications.

  Returns:
    tf.Tensor: output of forward part.
  """
    forward_rsu = RSU("switch", hparams.dropout, hparams.mode)

    def forward_step(state, _):
        with tf.variable_scope("forward"):
            new_state = forward_rsu(state)
            return shuffle_layer(new_state)

    forward_outputs = tf.scan(forward_step,
                              tf.range(0, n_bits),
                              initializer=block_out,
                              parallel_iterations=1,
                              swap_memory=True)

    return forward_outputs[-1, :, :, :]
Пример #7
0
    def truncated_rtrl(self, dL_dy, **kwargs):
        """The origin LSTM_97 real-time recurrent training algorithm
    :param dL_dy: dL/dy with shape (num_steps(=1), batch_size, *y.shape)
    :return: (grads_and_vars, dS/dW)
    """
        # Step 0: Split new_c outside the scan loop
        S = self._new_state_tensor[1]
        state_size = S.shape[1]
        Ss = tf.split(S, num_or_size_splits=state_size, axis=1)

        # Step 1: Update dS/dW = (dS/dW1, ..., dS/dWm)
        mascot = tf.placeholder(tf.float32)
        dS_dW = []
        for dS_dWj_tau, Wj in zip(self.gradient_buffer_placeholder,
                                  self.custom_var_list):
            dS_dWj = []
            split_dS_dWj_tau = tf.split(dS_dWj_tau,
                                        num_or_size_splits=state_size,
                                        axis=-1)
            for dSi_dWj_tau, Si in zip(split_dS_dWj_tau, Ss):
                dSi_dWj = []
                for b in range(hub.batch_size):
                    grad = tf.gradients(Si[b], Wj)[0]
                    dSi_dWj_n = dSi_dWj_tau[b] + tf.expand_dims(grad, -1)
                    dSi_dWj.append(tf.expand_dims(dSi_dWj_n, 0))
                # Concatenate along batches
                dSi_dWj = tf.concat(dSi_dWj, axis=0)  # (B, *W, 1)
                dS_dWj.append(dSi_dWj)
            dS_dWj = tf.concat(dS_dWj, axis=-1)  # (B, *W, S)
            dS_dW.append(dS_dWj)
        dS_dW = tuple(dS_dW)

        # Step 2: Compute dL/dW as dL/dy * dy/dS * dS/dW
        #           = \sum_{n over batches} \sum_{k over states} ...
        #               dL/dy * dy/dS * dS/dW
        #    (1) dL_dy.shape = (?(=1), B, D) in dL_dy
        #    (2) dy_dS = (dy1/dS, ..., dyn/dS) in self._grad_tensors
        #TODO
        dL_dy = tf.reshape(dL_dy, shape=(-1, 1, dL_dy.shape[2]))  # (B, 1, D)
        dy_dS = self._grad_tensors  # (B, D, S)

        def calc_dL_dW(_, mass):
            dldy, dyds, dsdw = mass
            # Calculate dL/dS
            dlds = tf.matmul(dldy, dyds)  # (1, S)
            # Calculate dL/dW
            dL_dW = []
            for dsdwj in dsdw:  # (*wj.shape, S)
                dL_dW.append(tf.reduce_sum(tf.multiply(dsdwj, dlds), axis=-1))
            return tuple(dL_dW)

        dL_dS_batch = tf.scan(calc_dL_dW, (dL_dy, dy_dS, dS_dW),
                              initializer=(mascot, ) *
                              len(self.custom_var_list))
        dL_dS = [tf.reduce_sum(t, axis=0) for t in dL_dS_batch]

        # Step 3: Return (((dW1, W1), ..., (dWn, Wn)), dS/dW)
        grads_and_vars = [(g, v) for g, v in zip(dL_dS, self.custom_var_list)]
        return tuple(grads_and_vars), dS_dW
Пример #8
0
 def forward_pass_states(self, processed_input, initial_hidden):
     all_hidden_states = tf.scan(
                             self.forward_pass_lstm, 
                             processed_input, 
                             initializer=initial_hidden, 
                             name='states')
     all_hidden_states = all_hidden_states[:, 0, :, :]
     return all_hidden_states
Пример #9
0
def get_first_occurrence_indices(reference, symbol, optimize_for_tpu=False):
    """For each row in reference, get index after the first occurrence of symbol.

  If symbol is not present on a row, return reference.shape[1] instead.

  Args:
    reference: [B, T] tensor of elements of the same type as symbol.
    symbol: int or [] scalar tensor of the same dtype as symbol.
    optimize_for_tpu: bool, whether to use a TPU-capable variant.

  Returns:
    A [B] reference of tf.int32 where x[i] is such that
    reference[i, x[i]-1] == symbol, and reference[i, j] != symbol
    for j<i-1. If symbol is not present on row i then x[i] = T.
  """
    if optimize_for_tpu:
        # Run code which can be compiled on TPU.
        # Transpose refernce to [T, B]
        reference = tf.transpose(reference, [1, 0])
        range_tensor = tf.range(reference.shape.as_list()[0])
        indexes = tf.stack([range_tensor] * reference.shape.as_list()[1], 1)
        symbol = tf.stack([symbol] * reference.shape.as_list()[1], 0)

        initial_indices = tf.constant(reference.shape.as_list()[0],
                                      shape=[reference.shape.as_list()[1]],
                                      dtype=tf.int32)

        # We want a function which moves backwards.
        def fn(current_index, elems):
            ref, ind = elems
            return tf.where(tf.equal(ref, symbol), ind + 1, current_index)

        min_indexes = tf.scan(fn, (reference, indexes),
                              initializer=initial_indices,
                              parallel_iterations=1,
                              reverse=True)
        return min_indexes[0]

    batch_size, max_length = reference.get_shape().as_list()
    symbol = tf.convert_to_tensor(symbol)
    symbol.shape.assert_is_compatible_with([])
    # Add symbol at the end of each row, to make sure tf.where works.
    tensor = tf.concat(
        [reference, tf.tile(symbol[None, None], [batch_size, 1])], axis=1)
    index_all_occurrences = tf.where(tf.equal(tensor, symbol))
    index_all_occurrences = tf.cast(index_all_occurrences, tf.int32)
    # `index_all_occurrences` is a [N, 2] tensor with coordinates of all positions
    # of `symbol` in `tensor`. So N will be >= batch size since there can be
    # several `symbol` in one row of tensor. We need to take only the position
    # of the first occurrence for each row. `segment_min` does that, taking the
    # lowest column index for each row index.
    index_first_occurrences = tf.segment_min(index_all_occurrences[:, 1],
                                             index_all_occurrences[:, 0])
    index_first_occurrences.set_shape([batch_size])
    index_first_occurrences = tf.minimum(index_first_occurrences + 1,
                                         max_length)
    return index_first_occurrences
Пример #10
0
def discounted_return(reward, length, discount):
    """Discounted Monte-Carlo returns."""
    timestep = tf.range(reward.shape[1].value)
    mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
    return_ = tf.reverse(
        tf.transpose(
            tf.scan(lambda agg, cur: cur + discount * agg,
                    tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
                    tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
    return tf.check_numerics(tf.stop_gradient(return_), 'return')
Пример #11
0
        def gamma_scales_log_prob_fn(params):
            assert num_classes == 2

            def unmarshal(params):
                results = []
                n_dimensions_used = 0
                if regression_use_beta_scales:
                    dim_list = [num_features, num_features, 1]
                else:
                    dim_list = [num_features, 1]
                for n_to_add in dim_list:
                    results.append(
                        params[Ellipsis,
                               n_dimensions_used:n_dimensions_used + n_to_add])
                    n_dimensions_used += n_to_add
                return tuple(results)

            log_prob = 0.
            if regression_use_beta_scales:
                beta, beta_log_scales, overall_log_scale = unmarshal(params)
                # p(per-variable scales)
                log_prob += tf.reduce_sum(
                    tfd.TransformedDistribution(
                        tfd.Gamma(0.5, 0.5),
                        tfb.Invert(tfb.Exp())).log_prob(beta_log_scales), -1)
            else:
                beta, overall_log_scale = unmarshal(params)
                beta_log_scales = 0.0
            # p(overall scale)
            log_prob += tf.reduce_sum(
                tfd.Normal(0., 10.).log_prob(overall_log_scale), -1)
            # p(beta)
            log_prob += tf.reduce_sum(tfd.Normal(0., 1.).log_prob(beta), -1)
            # p(y | x, beta)
            scaled_beta = beta * tf.exp(overall_log_scale) * tf.exp(
                beta_log_scales)
            if batch_size:

                def body(_, i):
                    logits = tf.einsum("nd,md->mn", x[i:i + batch_size],
                                       scaled_beta)
                    return tf.reduce_sum(
                        tfd.Bernoulli(logits=logits).log_prob(
                            y[i:i + batch_size]), -1)

                log_prob += tf.reduce_sum(
                    tf.scan(body,
                            tf.range(0, x.shape[0], batch_size),
                            initializer=tf.zeros(tf.shape(params)[:1]),
                            parallel_iterations=1), 0)
            else:
                logits = tf.einsum("nd,md->mn", x, scaled_beta)
                log_prob += tf.reduce_sum(
                    tfd.Bernoulli(logits=logits).log_prob(y), -1)
            return log_prob
Пример #12
0
def discounted_rewards(reward, done, gae_gamma, end_values):
    """Discounted rewards."""
    not_done = tf.expand_dims(1 - tf.cast(done, tf.float32), axis=2)
    end_values = end_values * not_done[-1, :, :]
    return_ = tf.scan(lambda agg, cur: cur + gae_gamma * agg,
                      tf.expand_dims(reward, axis=2) * not_done,
                      initializer=end_values,
                      reverse=True,
                      back_prop=False,
                      parallel_iterations=2)
    return tf.check_numerics(return_, "return")
Пример #13
0
def lambda_advantage(reward, value, length, discount):
    """Generalized Advantage Estimation."""
    timestep = tf.range(reward.shape[1].value)
    mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
    next_value = tf.concat([value[:, 1:], tf.zeros_like(value[:, -1:])], 1)
    delta = reward + discount * next_value - value
    advantage = tf.reverse(
        tf.transpose(
            tf.scan(lambda agg, cur: cur + discount * agg,
                    tf.transpose(tf.reverse(mask * delta, [1]), [1, 0]),
                    tf.zeros_like(delta[:, -1]), 1, False), [1, 0]), [1])
    return tf.check_numerics(tf.stop_gradient(advantage), 'advantage')
Пример #14
0
def lambda_return(reward, value, length, discount, lambda_):
    """TD-lambda returns."""
    timestep = tf.range(reward.shape[1].value)
    mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
    sequence = mask * reward + discount * value * (1 - lambda_)
    discount = mask * discount * lambda_
    sequence = tf.stack([sequence, discount], 2)
    return_ = tf.reverse(
        tf.transpose(
            tf.scan(lambda agg, cur: cur[0] + cur[1] * agg,
                    tf.transpose(tf.reverse(sequence, [1]), [1, 2, 0]),
                    tf.zeros_like(value[:, -1]), 1, False), [1, 0]), [1])
    return tf.check_numerics(tf.stop_gradient(return_), 'return')
Пример #15
0
    def _build(self, **kwargs):
        # self.init_state should be called for the first time inside this method
        #  so that it can be initialized within the appropriate graph

        # :: Define output
        # Make sure input has been defined
        if self.input_ is None: raise ValueError('!! input not found')
        assert isinstance(self.input_, Input)
        # Input placeholder has a shape of [batch_size, num_steps, *sample_shape]
        self.input_.set_group_shape((None, None))

        # Transpose input so as to fit the input of tf.scan
        input_placeholder = self.input_()

        # Build a shadow in order to foreknow the nested structure of `initializer`
        initializer = self._build_while_free()

        # Get elems to feed tf.scan
        elems = transpose_tensor(input_placeholder, [1, 0])
        if self.loss_in_loop:
            from tframe.models import Predictor
            assert isinstance(self, Predictor)
            targets_placeholder = self._targets.tensor
            elems = (elems, transpose_tensor(targets_placeholder, [1, 0]))

        # Send stuff into tf.scan and get results
        results = tf.scan(self, elems, initializer=initializer, name='Scan')
        scan_outputs, state_sequences = self._unwrap_outputs(results)

        # Activate state slot
        assert isinstance(self._state_slot, NestedTensorSlot)

        # Get last state and distribute to all recurrent-child
        last_state = Recurrent._extract_tensors(state_sequences,
                                                lambda t: t[-1])
        self._new_state_tensor = last_state
        self._distribute_last_tensors()

        # Plug last state to corresponding slot
        self._state_slot.plug(last_state)
        self._update_group.add(self._state_slot)

        # TODO: BETA
        if hub.use_rtrl: self._update_group.add(self.grad_buffer_slot)
        if hub.test_grad: self._update_group.add(self.grad_delta_slot)

        # Transpose scan outputs to get final outputs
        outputs = transpose_tensor(scan_outputs, [1, 0])

        # Output has a shape of [batch_size, num_steps, *output_shape]
        self.outputs.plug(outputs)
Пример #16
0
def ComputeChainStats(chain, target_mean, num_leapfrog_steps):
    # Chain is [num_steps, batch, num_dims]
    num_steps = tf.shape(chain)[0]
    counts = tf.to_float(tf.range(1, num_steps + 1))
    chain_mean = tf.cumsum(chain, 0) / counts[:, tf.newaxis, tf.newaxis]

    bias = target_mean - tf.reduce_mean(chain_mean, 1)
    variance = tf.reduce_mean(
        tf.square(chain_mean - tf.reduce_mean(chain_mean, 1, keep_dims=True)),
        1)
    inst_bias = target_mean - tf.reduce_mean(chain, 1)
    inst_variance = tf.reduce_mean(tf.square(target_mean - chain), 1)

    def reducer(_, idx):
        chain_mean = tf.reduce_mean(chain[idx // 2:idx], 0)
        bias = tf.reduce_mean(target_mean - chain_mean, 0)
        variance = tf.reduce_mean(
            tf.square(chain_mean - tf.reduce_mean(chain_mean, 0)), 0)
        return bias, variance

    indices = 1 + tf.range(num_steps)
    warmupped_bias, warmupped_variance = tf.scan(reducer,
                                                 indices,
                                                 initializer=(chain[0, 0],
                                                              chain[0, 0]))

    half_steps = num_steps // 2
    half_chain = chain[half_steps:]

    error_sq = tf.reduce_mean(
        tf.square(tf.reduce_mean(half_chain, 0) - target_mean), 0)

    ess = utils.EffectiveSampleSize(half_chain) / tf.to_float(half_steps)
    ess_per_grad = ess / tf.to_float(num_leapfrog_steps)
    rhat = tfp.mcmc.potential_scale_reduction(half_chain)
    autocorr = tf.reduce_mean(
        utils.SanitizedAutoCorrelation(half_chain, 0, max_lags=300), 1)

    return ChainStats(bias=bias,
                      variance=variance,
                      error_sq=error_sq,
                      inst_bias=inst_bias,
                      inst_variance=inst_variance,
                      ess=ess,
                      ess_per_grad=ess_per_grad,
                      rhat=rhat,
                      warmupped_bias=warmupped_bias,
                      warmupped_variance=warmupped_variance,
                      autocorr=autocorr)
Пример #17
0
    def _perform_update_steps(self, observ, action, old_mean, old_logstd,
                              reward, length):
        """Perform multiple update steps of value function and policy.

    The advantage is computed once at the beginning and shared across
    iterations. We need to decide for the summary of one iteration, and thus
    choose the one after half of the iterations.

    Args:
      observ: Sequences of observations.
      action: Sequences of actions.
      old_mean: Sequences of action means of the behavioral policy.
      old_logstd: Sequences of action log stddevs of the behavioral policy.
      reward: Sequences of rewards.
      length: Batch of sequence lengths.

    Returns:
      Summary tensor.
    """
        # tf.Print('observ, action, old_mean, old_logstd, reward, length:',observ, action, old_mean, old_logstd, reward, length)
        return_ = utility.discounted_return(reward, length,
                                            self._config.discount)
        value = self._network(observ, length).value
        if self._config.gae_lambda:
            advantage = utility.lambda_return(reward, value, length,
                                              self._config.discount,
                                              self._config.gae_lambda)
        else:
            advantage = return_ - value
        mean, variance = tf.nn.moments(advantage, axes=[0, 1], keep_dims=True)
        advantage = (advantage - mean) / (tf.sqrt(variance) + 1e-8)
        advantage = tf.Print(advantage,
                             [tf.reduce_mean(return_),
                              tf.reduce_mean(value)], 'return and value: ')
        advantage = tf.Print(advantage, [tf.reduce_mean(advantage)],
                             'normalized advantage: ')
        # pylint: disable=g-long-lambda
        value_loss, policy_loss, summary = tf.scan(
            lambda _1, _2: self._update_step(
                observ, action, old_mean, old_logstd, reward, advantage, length
            ),
            tf.range(self._config.update_epochs), [0., 0., ''],
            parallel_iterations=1)
        print_losses = tf.group(
            tf.Print(0, [tf.reduce_mean(value_loss)], 'value loss: '),
            tf.Print(0, [tf.reduce_mean(policy_loss)], 'policy loss: '))
        with tf.control_dependencies([value_loss, policy_loss, print_losses]):
            return summary[self._config.update_epochs // 2]
Пример #18
0
        def horseshoe_log_prob_fn(params):
            assert num_classes == 2

            (z, r1_local, r2_local, r1_global, r2_global) = tf.split(
                params, [num_features, num_features, num_features, 1, 1],
                axis=-1)

            def indep(d):
                return tfd.Independent(d, 1)

            zero = tf.zeros(num_features)
            one = tf.ones(num_features)
            half = 0.5 * one

            p_z = indep(tfd.Normal(zero, one))
            p_r1_local = indep(tfd.HalfNormal(one))
            p_r2_local = indep(tfd.InverseGamma(half, half))

            p_r1_global = indep(tfd.HalfNormal([1.]))
            p_r2_global = indep(tfd.InverseGamma([0.5], [0.5]))

            log_prob = (p_z.log_prob(z) + p_r1_local.log_prob(r1_local) +
                        p_r2_local.log_prob(r2_local) +
                        p_r1_global.log_prob(r1_global) +
                        p_r2_global.log_prob(r2_global))

            lambda_ = r1_local * tf.sqrt(r2_local)
            tau = r1_global * tf.sqrt(r2_global)
            beta = z * lambda_ * tau

            if batch_size:

                def body(_, i):
                    logits = tf.einsum("nd,md->mn", x[i:i + batch_size], beta)
                    return tfd.Independen(tfd.Bernoulli(logits=logits),
                                          1).log_prob(y[i:i + batch_size])

                log_prob += tf.reduce_sum(
                    tf.scan(body,
                            tf.range(0, x.shape[0], batch_size),
                            initializer=tf.zeros(tf.shape(params)[:1]),
                            parallel_iterations=1), 0)
            else:
                logits = tf.einsum("nd,md->mn", x, beta)
                log_prob += tfd.Independent(tfd.Bernoulli(logits=logits),
                                            1).log_prob(y)
            return log_prob
Пример #19
0
    def integrate(self, func, y0, t):
        time_delta_grid = t[1:] - t[:-1]

        def scan_func(y, t_dt):
            # recall the necessary variables
            n_ = self.n_
            F_b = self.F_b

            t, dt = t_dt

            # Differential updation
            dy = self._step_func(func, t, dt, y)  # Make code more modular.
            dy = tf.cast(dy, dtype=y.dtype)  # Failsafe

            out = y + dy  # the result after differential updation

            # Conditional to use specialized Integrator vs Normal Integrator (n=0)
            if n_ > 0:

                # Extract the last n variables for fire times
                fire_t = y[-n_:]

                # Value of change in firing times if neuron didnt fire = 0
                l = tf.zeros(tf.shape(fire_t), dtype=fire_t.dtype)

                # Value of change in firing times if neuron fired = Current Time - Last Fire Time
                l_ = t - fire_t

                # Check if Voltage is initially less than Firing Threshold
                z = tf.less(y[:n_], F_b)
                # Check if Voltage is more than Firing Threshold after updation
                z_ = tf.greater_equal(out[:n_], F_b)

                # tf.where(cond,a,b) chooses elements from a/b based on condition
                df = tf.where(tf.logical_and(z, z_), l_, l)

                fire_t_ = fire_t + df  # Update firing time

                return tf.concat([out[:-n_], fire_t_], 0)
            else:
                return out

        y = tf.scan(scan_func, (t[:-1], time_delta_grid), y0)

        return tf.concat([[y0], y], axis=0)
    def get_ht_ctx(self, emb_y, target_hidden_state_0, annotations, a_m, y_m):

        res = tf.scan(
            self.one_time_step,
            elems=(emb_y, y_m),
            initializer=(target_hidden_state_0,
                         tf.zeros([tf.shape(annotations)[0],
                                   self.context_dim]),
                         tf.zeros([
                             tf.shape(annotations)[0],
                             tf.shape(annotations)[1],
                             tf.shape(annotations)[2]
                         ]),
                         tf.zeros([
                             tf.shape(annotations)[0],
                             tf.shape(annotations)[1],
                             tf.shape(annotations)[2]
                         ]), annotations, a_m))

        return res
Пример #21
0
        def gamma_scales2_log_prob_fn(params):
            assert num_classes == 2

            (z, local_scale,
             global_scale) = tf.split(params, [num_features, num_features, 1],
                                      axis=-1)

            def indep(d):
                return tfd.Independent(d, 1)

            zero = tf.zeros(num_features)
            one = tf.ones(num_features)
            half = 0.5 * one

            p_z = indep(tfd.Normal(zero, one))
            p_local_scale = indep(tfd.Gamma(half, half))
            p_global_scale = indep(tfd.Gamma([0.5], [0.5]))

            log_prob = (p_z.log_prob(z) + p_local_scale.log_prob(local_scale) +
                        p_global_scale.log_prob(global_scale))

            beta = z * local_scale * global_scale

            if batch_size:

                def body(_, i):
                    logits = tf.einsum("nd,md->mn", x[i:i + batch_size], beta)
                    return tfd.Independen(tfd.Bernoulli(logits=logits),
                                          1).log_prob(y[i:i + batch_size])

                log_prob += tf.reduce_sum(
                    tf.scan(body,
                            tf.range(0, x.shape[0], batch_size),
                            initializer=tf.zeros(tf.shape(params)[:1]),
                            parallel_iterations=1), 0)
            else:
                logits = tf.einsum("nd,md->mn", x, beta)
                log_prob += tfd.Independent(tfd.Bernoulli(logits=logits),
                                            1).log_prob(y)
            return log_prob
Пример #22
0
def calculate_generalized_advantage_estimator(
    reward, value, done, gae_gamma, gae_lambda):
  # pylint: disable=g-doc-args
  """Generalized advantage estimator.

  Returns:
    GAE estimator. It will be one element shorter than the input; this is
    because to compute GAE for [0, ..., N-1] one needs V for [1, ..., N].
  """
  # pylint: enable=g-doc-args

  next_value = value[1:, :]
  next_not_done = 1 - tf.cast(done[1:, :], tf.float32)
  delta = (reward[:-1, :] + gae_gamma * next_value * next_not_done
           - value[:-1, :])

  return_ = tf.reverse(tf.scan(
      lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg,
      [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])],
      tf.zeros_like(delta[0, :]),
      parallel_iterations=1), [0])
  return tf.check_numerics(return_, "return")
Пример #23
0
    def _update_value(self, observ, reward, length):
        """Perform multiple update steps of the value baseline.

    We need to decide for the summary of one iteration, and thus choose the one
    after half of the iterations.

    Args:
      observ: Sequences of observations.
      reward: Sequences of reward.
      length: Batch of sequence lengths.

    Returns:
      Summary tensor.
    """
        with tf.name_scope('update_value'):
            loss, summary = tf.scan(
                lambda _1, _2: self._update_value_step(observ, reward, length),
                tf.range(self._config.update_epochs_value), [0., ''],
                parallel_iterations=1)
            print_loss = tf.Print(0, [tf.reduce_mean(loss)], 'value loss: ')
            with tf.control_dependencies([loss, print_loss]):
                return summary[self._config.update_epochs_value // 2]
Пример #24
0
def hmc(y, X, epsilon, L, start_q, alpha, n):
    config = tf.ConfigProto()
    if args.mode == 'cpu':
        config = tf.ConfigProto(device_count={'GPU': 0},
                                allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        ty = tf.Variable(initial_value=y)
        tX = tf.Variable(initial_value=X)
        tU = lambda beta: U(ty, tX, beta, alpha)
        #tgrad_U = lambda beta: tf.gradients(ys=U(ty, tX, beta, alpha), xs=beta)[0]
        tgrad_U = lambda beta: grad_U(ty, tX, beta, alpha)
        z = tf.Variable(initial_value=np.zeros(n, dtype='float32'))

        cur_q = tf.Variable(initial_value=start_q)
        sess.run(tf.global_variables_initializer())

        def update(current_q, _):
            q = tf.identity(current_q)
            p = tf.random.normal(current_q.get_shape())
            current_p = tf.identity(p)
            p = p - 0.5 * epsilon * tgrad_U(q)
            for i in range(L):
                # position step
                q = q + epsilon * p
                # momentum step
                if i < L - 1:
                    p = p - epsilon * tgrad_U(q)
            # negate for symmetry
            p = -(p - 0.5 * epsilon * tgrad_U(q))
            current_U = tU(current_q)
            current_K = 0.5 * (tf.transpose(current_p) @ current_p)
            proposed_U = tU(q)
            proposed_K = 0.5 * (tf.transpose(p) @ p)
            ratio = (current_U - proposed_U + current_K - proposed_K)[0][0]
            return tf.cond(tf.less(tf.log(tf.random.uniform(())), ratio),
                           lambda: q, lambda: current_q)

        sc = tf.squeeze(tf.scan(update, z, initializer=cur_q))
        return sess.run(sc)
Пример #25
0
    def lstm(self, inps):
        weight, bias = self.recurrent_weight, self.recurrent_bias
        init_state = tf.zeros(shape=[2, inps.shape[0], self.hidden_size],
                              dtype=tf.float32)

        def step(hprev, x):
            st_1, ct_1 = tf.unstack(hprev)

            fc_gate = tf.matmul(weight, tf.transpose(tf.concat([x, st_1], -1)))
            fc_gate = tf.transpose(fc_gate) + bias
            i, f, g, o = tf.split(fc_gate, 4, axis=1)
            i, f, g, o = tf.sigmoid(i), tf.sigmoid(f), tf.tanh(g), tf.sigmoid(
                o)
            ct = ct_1 * f + g * i
            st = tf.tanh(ct) * o

            return tf.stack([st, ct])

        states = tf.scan(step,
                         tf.transpose(inps, [1, 0, 2]),
                         initializer=init_state)

        return tf.transpose(states, [1, 2, 0, 3])[0]
Пример #26
0
    def construct_model(self, images, actions, rewards):
        """Build convolutional lstm video predictor using CDNA, or DNA.

    Args:
      images: list of tensors of ground truth image sequences
              there should be a 4D image ?xWxHxC for each timestep
      actions: list of action tensors
               each action should be in the shape ?x1xZ
      rewards: list of reward tensors
               each reward should be in the shape ?x1xZ
    Returns:
      gen_images: predicted future image frames
      gen_rewards: predicted future rewards
      latent_mean: mean of approximated posterior
      latent_std: std of approximated posterior

    Raises:
      ValueError: if more than 1 mask specified for DNA model.
    """
        context_frames = self.hparams.video_num_input_frames
        buffer_size = self.hparams.reward_prediction_buffer_size
        if buffer_size == 0:
            buffer_size = context_frames
        if buffer_size > context_frames:
            raise ValueError(
                "Buffer size is bigger than context frames %d %d." %
                (buffer_size, context_frames))

        batch_size = common_layers.shape_list(images[0])[0]
        ss_func = self.get_scheduled_sample_func(batch_size)

        def process_single_frame(prev_outputs, inputs):
            """Process a single frame of the video."""
            cur_image, input_reward, action = inputs
            time_step, prev_image, prev_reward, frame_buf, lstm_states = prev_outputs

            # sample from softmax (by argmax). this is noop for non-softmax loss.
            prev_image = self.get_sampled_frame(prev_image)

            generated_items = [prev_image]
            groundtruth_items = [cur_image]
            done_warm_start = tf.greater(time_step, context_frames - 1)
            input_image, = self.get_scheduled_sample_inputs(
                done_warm_start, groundtruth_items, generated_items, ss_func)

            # Prediction
            pred_image, lstm_states, _ = self.construct_predictive_tower(
                input_image, None, action, lstm_states, latent)

            if self.hparams.reward_prediction:
                reward_input_image = self.get_sampled_frame(pred_image)
                if self.hparams.reward_prediction_stop_gradient:
                    reward_input_image = tf.stop_gradient(reward_input_image)
                with tf.control_dependencies([time_step]):
                    frame_buf = [reward_input_image] + frame_buf[:-1]
                pred_reward = self.reward_prediction(frame_buf, None, action,
                                                     latent)
                pred_reward = common_video.decode_to_shape(
                    pred_reward, common_layers.shape_list(input_reward),
                    "reward_dec")
            else:
                pred_reward = prev_reward

            time_step += 1
            outputs = (time_step, pred_image, pred_reward, frame_buf,
                       lstm_states)

            return outputs

        # Latent tower
        latent = None
        if self.hparams.stochastic_model:
            latent_mean, latent_std = self.construct_latent_tower(images,
                                                                  time_axis=0)
            latent = common_video.get_gaussian_tensor(latent_mean, latent_std)

        # HACK: Do first step outside to initialize all the variables

        lstm_states = [None] * (5 if self.hparams.small_mode else 7)
        frame_buffer = [tf.zeros_like(images[0])] * buffer_size
        inputs = images[0], rewards[0], actions[0]
        init_image_shape = common_layers.shape_list(images[0])
        if self.is_per_pixel_softmax:
            init_image_shape[-1] *= 256
        init_image = tf.zeros(init_image_shape, dtype=images.dtype)
        prev_outputs = (tf.constant(0), init_image, tf.zeros_like(rewards[0]),
                        frame_buffer, lstm_states)

        initializers = process_single_frame(prev_outputs, inputs)
        first_gen_images = tf.expand_dims(initializers[1], axis=0)
        first_gen_rewards = tf.expand_dims(initializers[2], axis=0)

        inputs = (images[1:-1], rewards[1:-1], actions[1:-1])

        outputs = tf.scan(process_single_frame, inputs, initializers)
        gen_images, gen_rewards = outputs[1:3]

        gen_images = tf.concat((first_gen_images, gen_images), axis=0)
        gen_rewards = tf.concat((first_gen_rewards, gen_rewards), axis=0)

        if self.hparams.stochastic_model:
            return gen_images, gen_rewards, [latent_mean], [latent_std]
        else:
            return gen_images, gen_rewards, None, None
Пример #27
0
def define_ppo_epoch(memory, hparams, action_space, batch_size,
                     distributional_size=1, distributional_subscale=0.04,
                     distributional_threshold=0.0, epoch=-1):
  """PPO epoch."""
  observation, reward, done, action, old_pdf, value_sm = memory

  # This is to avoid propagating gradients through simulated environment.
  observation = tf.stop_gradient(observation)
  action = tf.stop_gradient(action)
  reward = tf.stop_gradient(reward)
  if hasattr(hparams, "rewards_preprocessing_fun"):
    reward = hparams.rewards_preprocessing_fun(reward)
  done = tf.stop_gradient(done)
  value_sm = tf.stop_gradient(value_sm)
  old_pdf = tf.stop_gradient(old_pdf)

  value = value_sm
  if distributional_size > 1:
    value = _distributional_to_value(
        value_sm, distributional_size, distributional_subscale,
        distributional_threshold)

  advantage = calculate_generalized_advantage_estimator(
      reward, value, done, hparams.gae_gamma, hparams.gae_lambda)

  if distributional_size > 1:
    # Create discounted reward values range.
    half = distributional_size // 2
    value_range = tf.to_float(tf.range(-half, half)) + 0.5  # Mid-bucket value.
    value_range *= distributional_subscale
    # Acquire new discounted rewards by using the above range as end-values.
    end_values = tf.expand_dims(value_range, 0)
    discounted_reward = discounted_rewards(
        reward, done, hparams.gae_gamma, end_values)
    # Re-normalize the discounted rewards to integers, in [0, dist_size] range.
    discounted_reward /= distributional_subscale
    discounted_reward += half
    discounted_reward = tf.maximum(discounted_reward, 0.0)
    discounted_reward = tf.minimum(discounted_reward, distributional_size)
    # Multiply the rewards by 2 for greater fidelity and round to integers.
    discounted_reward = tf.stop_gradient(tf.round(2 * discounted_reward))
    # The probabilities corresponding to the end values from old predictions.
    discounted_reward_prob = tf.stop_gradient(value_sm[-1])
    discounted_reward_prob = tf.nn.softmax(discounted_reward_prob, axis=-1)
  else:
    discounted_reward = tf.stop_gradient(advantage + value[:-1])
    discounted_reward_prob = discounted_reward  # Unused in this case.

  advantage_mean, advantage_variance = tf.nn.moments(advantage, axes=[0, 1],
                                                     keep_dims=True)
  advantage_normalized = tf.stop_gradient(
      (advantage - advantage_mean)/(tf.sqrt(advantage_variance) + 1e-8))

  add_lists_elementwise = lambda l1, l2: [x + y for x, y in zip(l1, l2)]

  number_of_batches = ((hparams.epoch_length-1) * hparams.optimization_epochs
                       // hparams.optimization_batch_size)
  epoch_length = hparams.epoch_length
  if hparams.effective_num_agents is not None:
    number_of_batches *= batch_size
    number_of_batches //= hparams.effective_num_agents
    epoch_length //= hparams.effective_num_agents

  assert number_of_batches > 0, "Set the paremeters so that number_of_batches>0"
  lr = learning_rate.learning_rate_schedule(hparams)

  shuffled_indices = [tf.random.shuffle(tf.range(epoch_length - 1))
                      for _ in range(hparams.optimization_epochs)]
  shuffled_indices = tf.concat(shuffled_indices, axis=0)
  shuffled_indices = shuffled_indices[:number_of_batches *
                                      hparams.optimization_batch_size]
  indices_of_batches = tf.reshape(shuffled_indices,
                                  shape=(-1, hparams.optimization_batch_size))
  input_tensors = [observation, action, discounted_reward,
                   discounted_reward_prob, advantage_normalized, old_pdf]

  ppo_step_rets = tf.scan(
      lambda a, i: add_lists_elementwise(  # pylint: disable=g-long-lambda
          a, define_ppo_step(
              [tf.gather(t, indices_of_batches[i, :]) for t in input_tensors],
              hparams, action_space, lr,
              epoch=epoch,
              distributional_size=distributional_size,
              distributional_subscale=distributional_subscale
          )),
      tf.range(number_of_batches),
      [0., 0., 0.],
      parallel_iterations=1)

  ppo_summaries = [tf.reduce_mean(ret) / number_of_batches
                   for ret in ppo_step_rets]
  ppo_summaries.append(lr)
  summaries_names = [
      "policy_loss", "value_loss", "entropy_loss", "learning_rate"
  ]

  summaries = [tf.summary.scalar(summary_name, summary)
               for summary_name, summary in zip(summaries_names, ppo_summaries)]
  losses_summary = tf.summary.merge(summaries)

  for summary_name, summary in zip(summaries_names, ppo_summaries):
    losses_summary = tf.Print(losses_summary, [summary], summary_name + ": ")

  return losses_summary
Пример #28
0
        def regular_log_prob_fn(params):
            if regression_hier_type == "none":
                beta = params
                beta_scaled = beta
            elif regression_hier_type == "centered":
                mu_0 = params[Ellipsis, -1]
                tau_0 = tf.nn.softplus(params[Ellipsis, -2])
                beta = params[Ellipsis, :-2]
                beta_scaled = beta
            elif regression_hier_type == "non_centered":
                mu_0 = params[Ellipsis, -1]
                tau_0 = tf.nn.softplus(params[Ellipsis, -2])
                beta = params[Ellipsis, :-2]
                beta_scaled = beta / tf.expand_dims(
                    tau_0, -1) + tf.expand_dims(mu_0, -1)
            else:
                raise ValueError("Unknown regression_hier_type:" +
                                 regression_hier_type)

            if batch_size:

                def body(_, i):
                    y_dist = tfd.Categorical(logits=tf.einsum(
                        "ij,kjm->kim", x[i:i + batch_size],
                        tf.reshape(beta_scaled,
                                   [-1, num_features, num_classes])))
                    return tf.reduce_sum(y_dist.log_prob(y[i:i + batch_size]),
                                         -1)

                log_prob = tf.reduce_sum(
                    tf.scan(body,
                            tf.range(0, x.shape[0], batch_size),
                            initializer=tf.zeros(tf.shape(params)[:1]),
                            parallel_iterations=1), 0)
            else:
                y_dist = tfd.Categorical(logits=tf.einsum(
                    "ij,kjm->kim", x,
                    tf.reshape(beta_scaled, [-1, num_features, num_classes])))
                log_prob = tf.reduce_sum(y_dist.log_prob(y), -1)

            def make_beta_dist(loc, scale):
                if regression_beta_prior == "normal":
                    return tfd.Normal(loc=loc, scale=scale)
                else:
                    if tf.convert_to_tensor(loc).shape.ndims == 0:
                        loc = tf.fill(
                            tf.stack([
                                tf.shape(params)[0], num_features * num_classes
                            ]), loc)
                    if tf.convert_to_tensor(scale).shape.ndims == 0:
                        scale = tf.fill(
                            tf.stack([
                                tf.shape(params)[0], num_features * num_classes
                            ]), scale)

                    scale = tf.linalg.LinearOperatorDiag(scale)
                    return tfd.MultivariateStudentTLinearOperator(loc=loc,
                                                                  scale=scale,
                                                                  df=t_dof)

            if regression_hier_type == "none":
                beta_dist = make_beta_dist(loc=0.0, scale=10.0)
            else:
                mu_0_dist = tfd.Normal(loc=0.0, scale=10.0)
                tau_0_dist = tfd.Gamma(2.0, 1.0)
                log_prob += mu_0_dist.log_prob(mu_0) + tau_0_dist.log_prob(
                    tau_0)

                if regression_hier_type == "centered":
                    mu_0 = tf.tile(tf.expand_dims(mu_0, -1),
                                   [1, num_features * num_classes])
                    tau_0 = tf.tile(tf.expand_dims(tau_0, -1),
                                    [1, num_features * num_classes])
                    beta_dist = make_beta_dist(loc=mu_0, scale=1.0 / tau_0)
                elif regression_hier_type == "non_centered":
                    beta_dist = make_beta_dist(loc=0.0, scale=1.0)
            log_prob += tf.reduce_sum(beta_dist.log_prob(beta), -1)
            return log_prob
Пример #29
0
def vtrace_from_importance_weights(log_rhos,
                                   discounts,
                                   rewards,
                                   values,
                                   bootstrap_value,
                                   clip_rho_threshold=1.0,
                                   clip_pg_rho_threshold=1.0,
                                   name='vtrace_from_importance_weights'):
    r"""V-trace from log importance weights.

  Calculates V-trace actor critic targets as described in

  "IMPALA: Scalable Distributed Deep-RL with
  Importance Weighted Actor-Learner Architectures"
  by Espeholt, Soyer, Munos et al.

  In the notation used throughout documentation and comments, T refers to the
  time dimension ranging from 0 to T-1. B refers to the batch size and
  NUM_ACTIONS refers to the number of actions. This code also supports the
  case where all tensors have the same number of additional dimensions, e.g.,
  `rewards` is `[T, B, C]`, `values` is `[T, B, C]`, `bootstrap_value`
  is `[B, C]`.

  Args:
    log_rhos: A float32 tensor of shape `[T, B, NUM_ACTIONS]` representing the
      log importance sampling weights, i.e.
      log(target_policy(a) / behaviour_policy(a)). V-trace performs operations
      on rhos in log-space for numerical stability.
    discounts: A float32 tensor of shape `[T, B]` with discounts encountered
      when following the behaviour policy.
    rewards: A float32 tensor of shape `[T, B]` containing rewards generated by
      following the behaviour policy.
    values: A float32 tensor of shape `[T, B]` with the value function estimates
      wrt. the target policy.
    bootstrap_value: A float32 of shape `[B]` with the value function estimate
      at time T.
    clip_rho_threshold: A scalar float32 tensor with the clipping threshold for
      importance weights (rho) when calculating the baseline targets (vs).
      rho^bar in the paper. If None, no clipping is applied.
    clip_pg_rho_threshold: A scalar float32 tensor with the clipping threshold
      on rho_s in \rho_s \delta log \pi(a|x) (r + \gamma v_{s+1} - V(x_s)). If
      None, no clipping is applied.
    name: The name scope that all V-trace operations will be created in.

  Returns:
    A VTraceReturns namedtuple (vs, pg_advantages) where:
      vs: A float32 tensor of shape `[T, B]`. Can be used as target to
        train a baseline (V(x_t) - vs_t)^2.
      pg_advantages: A float32 tensor of shape `[T, B]`. Can be used as the
        advantage in the calculation of policy gradients.
  """
    log_rhos = tf.convert_to_tensor(log_rhos, dtype=tf.float32)
    discounts = tf.convert_to_tensor(discounts, dtype=tf.float32)
    rewards = tf.convert_to_tensor(rewards, dtype=tf.float32)
    values = tf.convert_to_tensor(values, dtype=tf.float32)
    bootstrap_value = tf.convert_to_tensor(bootstrap_value, dtype=tf.float32)
    if clip_rho_threshold is not None:
        clip_rho_threshold = tf.convert_to_tensor(clip_rho_threshold,
                                                  dtype=tf.float32)
    if clip_pg_rho_threshold is not None:
        clip_pg_rho_threshold = tf.convert_to_tensor(clip_pg_rho_threshold,
                                                     dtype=tf.float32)

    # Make sure tensor ranks are consistent.
    rho_rank = log_rhos.shape.ndims  # Usually 2.
    values.shape.assert_has_rank(rho_rank)
    bootstrap_value.shape.assert_has_rank(rho_rank - 1)
    discounts.shape.assert_has_rank(rho_rank)
    rewards.shape.assert_has_rank(rho_rank)
    if clip_rho_threshold is not None:
        clip_rho_threshold.shape.assert_has_rank(0)
    if clip_pg_rho_threshold is not None:
        clip_pg_rho_threshold.shape.assert_has_rank(0)

    with tf.name_scope(
            name,
            values=[log_rhos, discounts, rewards, values, bootstrap_value]):
        rhos = tf.exp(log_rhos)
        if clip_rho_threshold is not None:
            clipped_rhos = tf.minimum(clip_rho_threshold,
                                      rhos,
                                      name='clipped_rhos')
        else:
            clipped_rhos = rhos

        cs = tf.minimum(1.0, rhos, name='cs')
        # Append bootstrapped value to get [v1, ..., v_t+1]
        values_t_plus_1 = tf.concat(
            [values[1:], tf.expand_dims(bootstrap_value, 0)], axis=0)
        deltas = clipped_rhos * (rewards + discounts * values_t_plus_1 -
                                 values)

        # Note that all sequences are reversed, computation starts from the back.
        sequences = (
            tf.reverse(discounts, axis=[0]),
            tf.reverse(cs, axis=[0]),
            tf.reverse(deltas, axis=[0]),
        )

        # V-trace vs are calculated through a scan from the back to the beginning
        # of the given trajectory.
        def scanfunc(acc, sequence_item):
            discount_t, c_t, delta_t = sequence_item
            return delta_t + discount_t * c_t * acc

        initial_values = tf.zeros_like(bootstrap_value)
        vs_minus_v_xs = tf.scan(fn=scanfunc,
                                elems=sequences,
                                initializer=initial_values,
                                parallel_iterations=1,
                                back_prop=False,
                                name='scan')
        # Reverse the results back to original order.
        vs_minus_v_xs = tf.reverse(vs_minus_v_xs, [0], name='vs_minus_v_xs')

        # Add V(x_s) to get v_s.
        vs = tf.add(vs_minus_v_xs, values, name='vs')

        # Advantage for policy gradient.
        vs_t_plus_1 = tf.concat(
            [vs[1:], tf.expand_dims(bootstrap_value, 0)], axis=0)
        if clip_pg_rho_threshold is not None:
            clipped_pg_rhos = tf.minimum(clip_pg_rho_threshold,
                                         rhos,
                                         name='clipped_pg_rhos')
        else:
            clipped_pg_rhos = rhos
        pg_advantages = (clipped_pg_rhos *
                         (rewards + discounts * vs_t_plus_1 - values))

        # Make sure no gradients backpropagated through the returned values.
        return VTraceReturns(vs=tf.stop_gradient(vs),
                             pg_advantages=tf.stop_gradient(pg_advantages))
Пример #30
0
def shuffle_network(inputs, hparams):
  """Neural Shuffle-Network with skip connections between blocks.

  Args:
    inputs: inputs to the Shuffle-Exchange network. Should be in length of power
      of 2.
    hparams: Model configuration

  Returns:
    tf.Tensor: Outputs of the Shuffle-Exchange last layer
  """

  def forward_step(state, layer_nr):
    with tf.variable_scope("forward"):
      last_state, residuals = state
      prev = residuals[layer_nr, :, :, :]
      switch = SwitchLayer("switch", hparams.dropout, hparams.mode)
      cur = switch(last_state, prev)
      return shuffle_layer(cur), residuals

  def reverse_step(state, layer_nr):
    with tf.variable_scope("reverse"):
      last_state, residuals = state
      prev = residuals[layer_nr, :, :, :]
      switch = SwitchLayer("reverse_switch", hparams.dropout, hparams.mode)
      cur = switch(last_state, prev)
      return reverse_shuffle_layer(cur), residuals

  input_shape = tf.shape(inputs)
  n_bits = tf.log(tf.cast(input_shape[1] - 1, tf.float32)) / tf.log(2.0)
  n_bits = tf.cast(n_bits, tf.int32) + 1

  queue_shape = [n_bits * 2, input_shape[0], input_shape[1], input_shape[2]]
  residuals_queue = tf.zeros(queue_shape)
  block_out = tf.tanh(inputs)

  for k in range(hparams.num_hidden_layers):
    with tf.variable_scope("benes_block_" + str(k), reuse=tf.AUTO_REUSE):
      forward_outputs, _ = tf.scan(
          forward_step,
          tf.range(0, n_bits),
          initializer=(block_out, residuals_queue),
          parallel_iterations=1,
          swap_memory=True)

      forward_tensors = [tf.expand_dims(block_out, axis=0), forward_outputs]
      forward_outputs = tf.concat(forward_tensors, axis=0)
      forward_last = forward_outputs[-1, :, :, :]

      reverse_outputs, _ = tf.scan(
          reverse_step,
          tf.range(n_bits, n_bits * 2),
          initializer=(forward_last, residuals_queue),
          parallel_iterations=1,
          swap_memory=True)

      block_out = reverse_outputs[-1, :, :, :]
      residuals_queue = tf.concat([forward_outputs, reverse_outputs], axis=0)

  last_layer = SwitchLayer("last_layer", hparams.dropout, hparams.mode)
  return last_layer(block_out, residuals_queue[n_bits * 2, :, :, :])