Exemplo n.º 1
0
    def broadcast_compute_tree_posterior_M(self, likelihood_AxSxM, leafnode_num):
        """
        Forms a log probability measure by dotting the stationary probs with tree likelihood
        And add that to log-prior of tree topology
        NOTE: we add log-prior of branch-lengths in body_update_weights
        """
        #with tf.device('/gpu:1'): 
        tree_likelihood_SxM = tf.einsum('ia,asm->sm',self.stationary_probs, likelihood_AxSxM)
        tree_likelihood_S = tf.reduce_mean(tree_likelihood_SxM, axis=1)
        data_loglik = tf.reduce_sum(tf.log(tree_likelihood_S))
        tree_logprior = -log_double_factorial(2 * tf.maximum(leafnode_num, 2) - 3)

        return data_loglik + tree_logprior
Exemplo n.º 2
0
def embedding_lookup(embeddings, indices, implementation='lookup'):
    """Different types of embedding approaches."""
    if implementation == 'lookup':
        return tf.nn.embedding_lookup(embeddings, indices)
    elif implementation == 'matmul':
        onehot = tf.one_hot(indices,
                            depth=embeddings.shape[0].value,
                            axis=-1,
                            dtype=embeddings.dtype)
        return tf.einsum('BLV,VD->BLD', onehot, embeddings)
    else:
        raise ValueError('Unsupported embedding lookup implementation %s' %
                         implementation)
Exemplo n.º 3
0
 def __call__(self, inputs, *args, **kwargs):
     """Call MeanShift."""
     std = tf.convert_to_tensor(self.rgb_std, dtype=tf.float32)
     self.weight = tf.eye(3)
     self.weight = tf.div(self.weight, std)
     self.bias = self.sign * self.rgb_range * tf.convert_to_tensor(
         self.rgb_mean, dtype=tf.float32)
     self.bias = tf.div(self.bias, std)
     res = tf.einsum('ij, njhw->nihw', self.weight, inputs)
     res = tf.transpose(res, [0, 2, 3, 1])
     res = tf.nn.bias_add(res, self.bias)
     res = tf.transpose(res, [0, 3, 1, 2])
     return res
Exemplo n.º 4
0
    def gamma_scales_log_prob_fn(params):
      assert num_classes == 2

      def unmarshal(params):
        results = []
        n_dimensions_used = 0
        if regression_use_beta_scales:
          dim_list = [num_features, num_features, 1]
        else:
          dim_list = [num_features, 1]
        for n_to_add in dim_list:
          results.append(
              params[Ellipsis, n_dimensions_used:n_dimensions_used + n_to_add])
          n_dimensions_used += n_to_add
        return tuple(results)

      log_prob = 0.
      if regression_use_beta_scales:
        beta, beta_log_scales, overall_log_scale = unmarshal(params)
        # p(per-variable scales)
        log_prob += tf.reduce_sum(
            tfd.TransformedDistribution(
                tfd.Gamma(0.5, 0.5),
                tfb.Invert(tfb.Exp())).log_prob(beta_log_scales), -1)
      else:
        beta, overall_log_scale = unmarshal(params)
        beta_log_scales = 0.0
      # p(overall scale)
      log_prob += tf.reduce_sum(
          tfd.Normal(0., 10.).log_prob(overall_log_scale), -1)
      # p(beta)
      log_prob += tf.reduce_sum(tfd.Normal(0., 1.).log_prob(beta), -1)
      # p(y | x, beta)
      scaled_beta = beta * tf.exp(overall_log_scale) * tf.exp(beta_log_scales)
      if batch_size:

        def body(_, i):
          logits = tf.einsum("nd,md->mn", x[i:i + batch_size], scaled_beta)
          return tf.reduce_sum(
              tfd.Bernoulli(logits=logits).log_prob(y[i:i + batch_size]), -1)

        log_prob += tf.reduce_sum(
            tf.scan(
                body,
                tf.range(0, x.shape[0], batch_size),
                initializer=tf.zeros(tf.shape(params)[:1]),
                parallel_iterations=1), 0)
      else:
        logits = tf.einsum("nd,md->mn", x, scaled_beta)
        log_prob += tf.reduce_sum(tfd.Bernoulli(logits=logits).log_prob(y), -1)
      return log_prob
Exemplo n.º 5
0
def trail_dense(x,
                output_shape,
                begin_axis=-1,
                bias=True,
                name=None,
                kernel_initializer=WEIGHT_INITIALIZER,
                bias_initializer=BIAS_INITIALIZER):
    """A dense layer that projects x[begin_axis:] to output_shape."""
    if isinstance(output_shape, int):
        output_shape = [output_shape]
    else:
        output_shape = list(output_shape)

    input_shape = x.shape.as_list()
    input_rank = len(input_shape)
    shared_size = begin_axis % input_rank
    i_only_size = input_rank - shared_size
    o_only_size = len(output_shape)

    assert input_rank + o_only_size < len(string.ascii_lowercase)
    einsum_str = string.ascii_lowercase[:input_rank + o_only_size]

    offset = 0
    shared_str = einsum_str[offset:offset + shared_size]
    offset += shared_size
    i_only_str = einsum_str[offset:offset + i_only_size]
    offset += i_only_size
    o_only_str = einsum_str[offset:offset + o_only_size]

    input_str = '{}{}'.format(shared_str, i_only_str)
    output_str = '{}{}'.format(shared_str, o_only_str)
    weight_str = '{}{}'.format(i_only_str, o_only_str)
    weight_shape = input_shape[begin_axis:] + output_shape

    # Actual computation
    with tf.variable_scope(name, default_name='dense'):
        weight = tf.get_variable('weight',
                                 shape=weight_shape,
                                 initializer=kernel_initializer,
                                 dtype=x.dtype)
        einsum_expr = '{},{}->{}'.format(input_str, weight_str, output_str)
        output = tf.einsum(einsum_expr, x, weight)

        if bias:
            bias = tf.get_variable('bias',
                                   shape=output_shape,
                                   initializer=bias_initializer,
                                   dtype=x.dtype)
            output += bias

    return output
Exemplo n.º 6
0
            def two_step_gather_per_level(features_level, mask):
                """Performs two-step gather using einsum for every level of features."""
                (_, feature_height, feature_width,
                 _) = features_level.get_shape().as_list()
                boundaries = tf.tile(
                    tf.expand_dims(
                        tf.expand_dims([feature_height, feature_width], 0), 0),
                    [batch_size, num_boxes, 1])
                boundaries = tf.cast(boundaries, boxes.dtype)
                kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = compute_grid_positions(
                    boxes, boundaries, output_size, sample_offset=0.5)

                # shape is:
                # [batch_size, num_boxes, output_size, 2, spatial_size]
                box_grid_y_one_hot, box_grid_x_one_hot = get_grid_one_hot(
                    box_gridy0y1, box_gridx0x1, feature_height, feature_width)

                # # shape is [batch_size, num_boxes, output_size, spatial_size]
                box_grid_y_weight = tf.reduce_sum(tf.multiply(
                    box_grid_y_one_hot, kernel_y),
                                                  axis=-2)
                box_grid_x_weight = tf.reduce_sum(tf.multiply(
                    box_grid_x_one_hot, kernel_x),
                                                  axis=-2)

                # shape is [batch_size, num_boxes, output_size, width, feature]
                y_outputs = tf.einsum(
                    'bhwf,bnyh->bnywf', features_level,
                    tf.cast(box_grid_y_weight, dtype=features_level.dtype))

                # shape is [batch_size, num_boxes, output_size, output_size, feature]
                x_outputs = tf.einsum(
                    'bnywf,bnxw->bnyxf', y_outputs,
                    tf.cast(box_grid_x_weight, dtype=features_level.dtype))

                outputs = tf.where(tf.equal(mask, tf.zeros_like(mask)),
                                   tf.zeros_like(x_outputs), x_outputs)
                return outputs
Exemplo n.º 7
0
def dot_product_attention(q, k,
                          v=None,
                          scale=1.,
                          normalize=True,
                          weights_only=False,
                          hard=False):
  """Computes dot product attention.

  Args:
    q: queries. Tensor of shape [B, m, d_k].
    k: keys. Tensor of shape [B, n, d_k].
    v: values. Tensor of shape [B, n, d_v]. Can be None if weights_only=True.
    scale: Attn hyperparam that scales the dot product. Tensor of shape [B].
    normalize: Boolean that determines whether weights sum to 1.
    weights_only: Boolean which returns attention weights if True.
    hard: Returns one-hot argmax weights instead of softmax if True.

  Returns:
    tensor of shape [B, m, d_v].
  """
  d_k = tf.shape(q)[-1]
  scale = tf.reshape(scale * tf.sqrt(tf.cast(d_k, tf.float32)),
                     [-1, 1, 1])  # [B, 1, 1]
  unnorm_weights = tf.einsum('bjk,bik->bij', k, q) / scale  # [B, m, n]
  if normalize:
    weight_fn = tf.nn.softmax
  else:
    weight_fn = tf.sigmoid
  weights = weight_fn(unnorm_weights)  # [B, m, n]
  if hard:
    weights = tf.one_hot(
        tf.math.argmax(weights, axis=-1),
        depth=tf.shape(k)[1],
        axis=-1)
  if weights_only:
    return weights
  rep = tf.einsum('bik,bkj->bij', weights, v)  # [B, m, d_v]
  return rep
Exemplo n.º 8
0
  def lm_logits(self, hidden, lookup_table=None, mapping=None, scope="lm"):
    """Compute logits for language modeling cross entropy loss."""
    net_config = self.net_config
    initializer = self.get_initializer()

    # Extract relavant hidden states
    if mapping is not None:
      hidden = tf.einsum("...id,...ki->...kd", hidden, mapping)

    # Apply one more non-linear transformation before the output layer.
    # This matrix is not used after pre-training.
    with tf.variable_scope("{}_proj".format(scope)):
      hidden = ops.dense(
          hidden,
          out_shape=net_config.d_embed,
          inp_shape=net_config.d_model,
          activation=ops.get_activation(net_config.ff_activation),
          initializer=initializer)
      hidden = ops.layer_norm_op(hidden, norm_shape=[net_config.d_embed])

    with tf.variable_scope("{}_loss".format(scope)):
      if lookup_table is not None:
        softmax_w = lookup_table
      else:
        softmax_w = tf.get_variable("weight",
                                    [net_config.vocab_size, net_config.d_embed],
                                    dtype=hidden.dtype, initializer=initializer)

      softmax_b = tf.get_variable("bias", [net_config.vocab_size],
                                  dtype=hidden.dtype,
                                  initializer=tf.zeros_initializer())

      logits = tf.einsum("...d,nd->...n", hidden, softmax_w) + softmax_b
      if logits.dtype != tf.float32:
        # Always use float32 for LM loss
        logits = tf.cast(logits, tf.float32)

    return logits
Exemplo n.º 9
0
    def __init__(self, num_classes, batch_size, num_steps, num_inputs,
                       rnn_size, num_layers, learning_rate, dataset_name,
                       c_k, grad_clip=5, sampling=False):
    
        # if sampling is True,use SGD, only 1 sample
        if sampling == True:
            batch_size = num_steps * 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # input layer
        self.inputs, self.targets, self.keep_prob = build_inputs(num_steps, num_classes, num_inputs)

        # rnn layerfrom f_gate_cell import forget_cell

        cell, self.initial_state = build_rnn(rnn_size, num_layers, batch_size, num_steps, self.keep_prob, c_k)
#        cell1, self.initial_state1 = build_rnn(rnn_size, num_layers, batch_size, num_steps, self.keep_prob, c_k)
#        cell2, self.initial_state2 = build_rnn(rnn_size, num_layers, batch_size, num_steps, self.keep_prob, c_k)
        
#        cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])
#        # one-hot coding for inputs
#        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # running the RNN
        outputs, state = tf.nn.dynamic_rnn(cell, self.inputs, initial_state=self.initial_state)
#        outputs, state = tf.nn.dynamic_rnn(cell, self.inputs, dtype=tf.float32)
        self.final_state = state
        
        # predicting the results
        self.prediction, self.logits = build_output(state, rnn_size, num_classes, dataset_name, c_k)

#        self.coeff_a = tf.constant([0])
        coeff_a_kernel = tf.reshape(outputs[rnn_size: , -1, : ], [c_k, rnn_size])
        coeff_a_eye = tf.eye(rnn_size, batch_shape=[c_k])
        self.coeff_a= tf.reshape(tf.transpose(tf.einsum('ij,ijk->ijk',coeff_a_kernel,coeff_a_eye), perm=[1,0,2]), [rnn_size, rnn_size*c_k])
        self.coeff_a = tf.concat([self.coeff_a[:,:rnn_size]+outputs[:rnn_size, -1, :], self.coeff_a[:,rnn_size:]], 1)
        if (c_k > 1) :
            self.coeff_a = tf.concat([self.coeff_a, 
                                     tf.convert_to_tensor(np.kron(np.eye(c_k-1, M=c_k), np.eye(rnn_size)), dtype=tf.float32)], 0)
        
        # Loss and optimizer (with gradient clipping)
        self.loss_nn, self.regularizer = build_loss(self.logits, self.targets, rnn_size, num_classes, self.coeff_a, c_k)
        self.loss = self.loss_nn + self.regularizer
        
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)
        
        self.accuracy, self.accuracy_op = tf.metrics.accuracy(labels=tf.argmax(self.targets,1),
                                                              predictions=tf.argmax(self.logits,1), name='accuracy')
Exemplo n.º 10
0
  def test_compare_einsum(self):
    """Batch matrix multiplication test."""

    np.random.seed(42)

    # We're comparing with regular `einsum` so we choose a `min_value`
    # to make underflow impossible.
    a = _random_sparse_tensor([2, 2, 2, 2, 2, 2, 2], min_value=0.1)
    b = _random_sparse_tensor([2, 2, 2, 2, 2, 2, 2], min_value=0.1)
    formula = 'abcdcfg,edfcbaa->bd'
    u = tf.math.log(tf.einsum(formula, a, b))
    v = logeinsumexp(formula, tf.math.log(a), tf.math.log(b))

    self.assertAllClose(u, v)
Exemplo n.º 11
0
def rearrange(src, dst, t):
  """Reorder dimensions of tensor according to formula."""

  new_indices = ''
  for i in dst:
    if i not in src:
      new_indices += i
  new_src = src + new_indices
  new_t = tf.reshape(t, tf.concat(
      [tf.shape(t), tf.ones(len(new_indices), dtype=tf.int32)], axis=0))
  formula = '{}->{}'.format(new_src, dst)
  # It is safe to use ordinary `einsum` here as no summations
  # are performed.
  return tf.einsum(formula, new_t)
Exemplo n.º 12
0
def _attention_scores(from_view, to_view, additive_mask, queries, keys):
    """Computes masked attention scores between two views of bucketed tensors."""
    from_buckets = 'N' if from_view in ('tail', 'window') else ''
    to_buckets = 'N' if to_view in ('tail', 'window') else ''
    result_buckets = from_buckets or to_buckets
    # Computes unmasked attention scores. If either from or to views have a
    # num_bucket dimension, we keep it in the output.
    scores = tf.einsum(
        f'BH{from_buckets}FE,BH{to_buckets}TE->BH{result_buckets}FT',
        getattr(queries, from_view),
        getattr(keys, to_view),
        name=f'query_key_{from_view}_{to_view}')

    return scores + additive_mask
Exemplo n.º 13
0
def _reduce_second_m35(m35s, m35c, is_diagonal_35s, seed=0):
    """Reduces the 2nd 35-irrep."""
    diag = numpy.diagonal(m35s if is_diagonal_35s else m35c)
    gens = _get_generators_for_reducing_second_m35(
        diag, 'gsS,sScC->gcC' if is_diagonal_35s else 'gcC,sScC->gsS',
        algebra.spin8.gamma_sscc)
    num_gens = len(gens)
    if num_gens == 0:
        return m35s, m35c  # No residual symmetry to exploit.
    # This residual symmetry is typically rather small.
    # So, doing a direct minimization is perhaps appropriate.
    rng = numpy.random.RandomState(seed=seed)
    v_coeffs_initial = rng.normal(
        scale=1e-3, size=(num_gens, ))  # Break symmetry with noise.
    graph = tf.Graph()
    with graph.as_default():
        tc_gens = tf.constant(gens, dtype=tf.float64)
        tc_m35 = tf.constant(m35c if is_diagonal_35s else m35s,
                             dtype=tf.float64)
        t_coeffs = tf.Variable(initial_value=v_coeffs_initial,
                               trainable=True,
                               dtype=tf.float64)
        t_rot = tf_cexpm.cexpm(tf.einsum('i,iab->ab', t_coeffs, tc_gens),
                               complex_arg=False)
        t_m35_rotated = tf.einsum('Ab,Bb->AB',
                                  tf.einsum('ab,Aa->Ab', tc_m35, t_rot), t_rot)
        # Our 'loss' is the sum of magnitudes of the off-diagonal parts after
        # rotation.
        t_loss = (tf.norm(t_m35_rotated, ord=1) -
                  tf.norm(tf.linalg.diag_part(t_m35_rotated), ord=1))
        optimizer = contrib_opt.ScipyOptimizerInterface(t_loss)
        with tf.compat.v1.Session() as sess:
            sess.run([tf.global_variables_initializer()])
            optimizer.minimize(sess)
            # We are only interested in the diagonalized matrix.
            m_diag = sess.run([t_m35_rotated])[0]
            return (m35s, m_diag) if is_diagonal_35s else (m_diag, m35c)
Exemplo n.º 14
0
def dense_layer_3d(
    input_tensor,
    num_attention_heads,
    head_size,
    initializer,
    activation,
    use_einsum,
    name = None,
):
    """A dense layer with 3D kernel.

  Args:
    input_tensor: float Tensor of shape [batch, seq_length, hidden_size].
    num_attention_heads: Number of attention heads.
    head_size: The size per attention head.
    initializer: Kernel initializer.
    activation: Actication function.
    use_einsum: bool. Whether to use einsum or reshape+matmul for dense layers.
    name: The name scope of this layer.

  Returns:
    float logits Tensor.
  """

    input_shape = get_shape_list(input_tensor)
    hidden_size = input_shape[2]

    with tf.variable_scope(name):
        w = tf.get_variable(
            name = 'kernel',
            shape = [hidden_size, num_attention_heads * head_size],
            initializer = initializer,
        )
        w = tf.reshape(w, [hidden_size, num_attention_heads, head_size])
        b = tf.get_variable(
            name = 'bias',
            shape = [num_attention_heads * head_size],
            initializer = tf.zeros_initializer,
        )
        b = tf.reshape(b, [num_attention_heads, head_size])
        if use_einsum:
            ret = tf.einsum('BFH,HND->BFND', input_tensor, w)
        else:
            ret = einsum_via_matmul(input_tensor, w, 1)
        ret += b
    if activation is not None:
        return activation(ret)
    else:
        return ret
Exemplo n.º 15
0
def dense(x,
          out_shape,
          initializer,
          inp_shape=None,
          begin_axis=-1,
          use_bias=True,
          activation=None,
          scope="dense",
          reuse=False):
    """A more flexible dense layer."""
    if isinstance(out_shape, int):
        out_shape = [out_shape]
    if inp_shape is None:
        inp_shape = x.shape.as_list()[begin_axis:]
    elif isinstance(inp_shape, int):
        inp_shape = [inp_shape]

    inp_syms = ["a", "b", "c", "d"]
    out_syms = ["e", "f", "g", "h"]

    prefix = get_einsum_prefix(x.shape.ndims - len(inp_shape))
    inp_str = get_einsum_prefix(len(inp_shape), inp_syms)
    out_str = get_einsum_prefix(len(out_shape), out_syms)

    with tf.variable_scope(scope, reuse=reuse):
        kernel_shape = inp_shape + out_shape
        kernel = tf.get_variable("kernel",
                                 kernel_shape,
                                 dtype=x.dtype,
                                 initializer=initializer)

        output = tf.einsum(
            "{0}{1},{1}{2}->{0}{2}".format(prefix, inp_str, out_str), x,
            kernel)
        print(x.get_shape(), kernel.get_shape(), "==dense shape==", prefix,
              inp_str, out_str, output.get_shape())

        if use_bias:
            bias = tf.get_variable("bias",
                                   out_shape,
                                   dtype=x.dtype,
                                   initializer=tf.zeros_initializer())
            output += bias

        if activation is not None:
            output = activation(output)

    return output
Exemplo n.º 16
0
    def __init__(self,
                 femb_size,
                 hidd_size_enc,
                 hidd_size_sa,
                 seq_size,
                 learning_rate,
                 seed=42):
        self.global_step = tf.Variable(0, trainable=False)
        self.training = tf.placeholder(tf.bool, None)
        self.masked_data = tf.placeholder(tf.float32,
                                          (None, seq_size, seq_size + 1),
                                          name='masked_data')
        self.true_data = tf.placeholder(tf.float32, (None, seq_size),
                                        name='true_data')
        self.masked_indices_mask = tf.placeholder(tf.float32, (None, seq_size),
                                                  name='masked_indices_mask')
        self.feature_emb_matrix = tf.get_variable(
            shape=(seq_size + 1, femb_size),
            initializer=tf.initializers.glorot_normal(seed=seed),
            trainable=True,
            name='feature_emb_matrix')
        embedded_seq = tf.einsum('pe, bsp -> bse', self.feature_emb_matrix,
                                 self.masked_data)

        output_layer = tf.keras.layers.Dense(1)

        if hidd_size_sa > 0:
            sal = SelfAttentionLayer(femb_size, hidd_size_sa, seed)
            saw, hidd_r = sal.call(embedded_seq, self.training)
        else:
            hidd_l = tf.keras.layers.Dense(hidd_size_enc)
            hidd_r = hidd_l(embedded_seq)

        self.reconstructed_seq = tf.squeeze(output_layer(hidd_r), axis=-1)
        self.reconstructed_seq = tf.sigmoid(self.reconstructed_seq)
        self.loss = tf.reduce_sum(
            tf.multiply(tf.abs(self.true_data - self.reconstructed_seq),
                        self.masked_indices_mask)) / tf.reduce_sum(
                            self.masked_indices_mask)

        optimizer = tf.compat.v1.train.AdamOptimizer(
            learning_rate=learning_rate)
        update_ops = tf.compat.v1.get_collection(tf.GraphKeys.UPDATE_OPS)
        train_op = optimizer.minimize(self.loss, global_step=self.global_step)
        self.init_op = tf.group(tf.compat.v1.global_variables_initializer(),
                                tf.compat.v1.local_variables_initializer())
        self.train_op = tf.group([train_op, update_ops])
        self.saver = tf.train.Saver(max_to_keep=None)
def _locally_connected_ising_model_energy(variables, potentials):
    """1D Ising model with couplings between adjacent variables.

  Args:
    variables: [batch_size, sequence_length] int array (np or Tensor) or
      [batch_size, sequence_length, vocab_size] array (corresponding to one-hot
      vectors).
    potentials: [sequence_length - 1, vocab_size, vocab_size]

  Returns:
    [batch_size] array of energy
  """
    variables = np.asarray(variables, dtype=int)
    vocab_size = potentials.shape[-1]
    oh = _one_hot(variables, depth=vocab_size)
    return tf.einsum('bim,bin,imn->b', oh[:, :-1, :], oh[:, 1:, :], potentials)
Exemplo n.º 18
0
    def call(self, reshaped_inputs, total_token_num):
        """MoE FFN Layer call."""
        combine_tensor, dispatch_mask, aux_loss = \
            self.gate(reshaped_inputs, total_token_num)  # G'SEC

        # dispatched inputs
        dispatch_inputs = tf.einsum("GSEC,GSM->EGCM",
                                    dispatch_mask,
                                    reshaped_inputs,
                                    name="dispatch_inputs")  # EG'CM

        # inter_experts forward
        if FLAGS.op_split:
            with epl.split(device_count=FLAGS.worker_gpu):
                intermediate = tf.einsum(
                    'EGCM,EMH->EGCH',
                    dispatch_inputs,
                    self.inter_experts,
                    name="dispatched_inter_outputs")  # EG'CH
                # activation function
                activated_inters = self.activation_fn(intermediate)  # EG'CH
                # output_experts forward
                output_experts = tf.einsum('EGCH,EHM->EGCM',
                                           activated_inters,
                                           self.out_experts,
                                           name="dispatched_outputs")
                combined_outputs = tf.einsum('GSEC,EGCM->GSM',
                                             combine_tensor,
                                             output_experts,
                                             name="combined_outputs")
        else:
            intermediate = tf.einsum('EGCM,EMH->EGCH',
                                     dispatch_inputs,
                                     self.inter_experts,
                                     name="dispatched_inter_outputs")  # EG'CH
            # activation function
            activated_inters = self.activation_fn(intermediate)  # EG'CH

            # output_experts forward
            output_experts = tf.einsum('EGCH,EHM->EGCM',
                                       activated_inters,
                                       self.out_experts,
                                       name="dispatched_outputs")

            combined_outputs = tf.einsum('GSEC,EGCM->GSM',
                                         combine_tensor,
                                         output_experts,
                                         name="combined_outputs")

        return combined_outputs, aux_loss
Exemplo n.º 19
0
        def horseshoe_log_prob_fn(params):
            assert num_classes == 2

            (z, r1_local, r2_local, r1_global, r2_global) = tf.split(
                params, [num_features, num_features, num_features, 1, 1],
                axis=-1)

            def indep(d):
                return tfd.Independent(d, 1)

            zero = tf.zeros(num_features)
            one = tf.ones(num_features)
            half = 0.5 * one

            p_z = indep(tfd.Normal(zero, one))
            p_r1_local = indep(tfd.HalfNormal(one))
            p_r2_local = indep(tfd.InverseGamma(half, half))

            p_r1_global = indep(tfd.HalfNormal([1.]))
            p_r2_global = indep(tfd.InverseGamma([0.5], [0.5]))

            log_prob = (p_z.log_prob(z) + p_r1_local.log_prob(r1_local) +
                        p_r2_local.log_prob(r2_local) +
                        p_r1_global.log_prob(r1_global) +
                        p_r2_global.log_prob(r2_global))

            lambda_ = r1_local * tf.sqrt(r2_local)
            tau = r1_global * tf.sqrt(r2_global)
            beta = z * lambda_ * tau

            if batch_size:

                def body(_, i):
                    logits = tf.einsum("nd,md->mn", x[i:i + batch_size], beta)
                    return tfd.Independen(tfd.Bernoulli(logits=logits),
                                          1).log_prob(y[i:i + batch_size])

                log_prob += tf.reduce_sum(
                    tf.scan(body,
                            tf.range(0, x.shape[0], batch_size),
                            initializer=tf.zeros(tf.shape(params)[:1]),
                            parallel_iterations=1), 0)
            else:
                logits = tf.einsum("nd,md->mn", x, beta)
                log_prob += tfd.Independent(tfd.Bernoulli(logits=logits),
                                            1).log_prob(y)
            return log_prob
Exemplo n.º 20
0
def dense_layer_2d(
    input_tensor,
    output_size,
    initializer,
    activation,
    use_einsum,
    num_attention_heads = 1,
    name = None,
):
    """A dense layer with 2D kernel.

  Args:
    input_tensor: Float tensor with rank 3.
    output_size: The size of output dimension.
    initializer: Kernel initializer.
    activation: Activation function.
    use_einsum: bool. Whether to use einsum or reshape+matmul for dense layers.
    num_attention_heads: number of attention head in attention layer.
    name: The name scope of this layer.

  Returns:
    float logits Tensor.
  """
    del num_attention_heads  # unused
    input_shape = get_shape_list(input_tensor)
    hidden_size = input_shape[2]
    with tf.variable_scope(name):
        w = tf.get_variable(
            name = 'kernel',
            shape = [hidden_size, output_size],
            initializer = initializer,
        )
        b = tf.get_variable(
            name = 'bias',
            shape = [output_size],
            initializer = tf.zeros_initializer,
        )
        if use_einsum:
            ret = tf.einsum('BFH,HO->BFO', input_tensor, w)
        else:
            ret = tf.matmul(input_tensor, w)
        ret += b
    if activation is not None:
        return activation(ret)
    else:
        return ret
def _fully_connected_ising_model_energy(variables, potentials):
    """Ising model with full-connected coupling graph.

  Args:
    variables: [batch_size, sequence_length] int array (np or Tensor) or
      [batch_size, sequence_length, vocab_size] array (corresponding to one-hot
      vectors).
    potentials: [sequence_length, sequence_length, vocab_size, vocab_size] float
      array (np or Tensor).

  Returns:
    [batch_size] Tensor of energy.
  """
    variables = np.asarray(variables, dtype=int)
    vocab_size = potentials.shape[-1]
    onehot = _one_hot(variables, depth=vocab_size)
    return tf.einsum('bim,bjn,ijmn->b', onehot, onehot, potentials)
Exemplo n.º 22
0
def main_cost(x, y, kappa):
    yx_diff = tf.expand_dims(y, 1) - tf.expand_dims(x, 0)
    yx_normsq = tf.einsum('ijk,ijk->ij', yx_diff, yx_diff)

    # note that all entries are between 0.0 and 1.0
    yx_gauss = tf.exp(-1.0 * yx_normsq / (2.0 * kappa * kappa))

    yx_gauss_sum = tf.reduce_sum(yx_gauss, axis=0)
    yx_gauss_sum = yx_gauss_sum

    # to protect the log from becoming -inf
    yx_gauss_sum = tf.clip_by_value(yx_gauss_sum,
                                    clip_value_min=1.0e-306,
                                    clip_value_max=1.0e+306)

    yx_cost = -1.0 * kappa * tf.reduce_sum(tf.log(yx_gauss_sum))
    return yx_cost, yx_gauss_sum
Exemplo n.º 23
0
    def call(self, inputs):
        """Implements call() for Dense2DProjection.

    Args:
      inputs: float Tensor of shape [batch, from_seq_length,
        num_attention_heads, size_per_head].

    Returns:
      A 3D Tensor.
    """
        ret = tf.einsum("abc,cd->abd", inputs, self.kernel)
        ret += self.bias
        if self.activation is not None:
            if self.dtype == tf.float16 and self.fp32_activation:
                ret = tf.cast(ret, tf.float32)
            return self.activation(ret)
        return ret
Exemplo n.º 24
0
  def __init__(self, sess, config, name, is_train):
    self.sess = sess
    self.name = name
    self.is_train = is_train


    self.X_hsd = tf.placeholder(tf.float32, shape=[config.batch_size, config.im_size, config.im_size, 3], name="original_color_image")
    self.D, h_s = tf.split(self.X_hsd,[1,2], axis=3)

    self.E_Step = CNN("E_Step", config, is_train=self.is_train)
    self.Gama = self.E_Step(self.D)
    self.loss, self.Mu, self.Std = GMM_M_Step(self.X_hsd, self.Gama, config.ClusterNo, name='GMM_Statistics')
    
    if self.is_train:

      self.optim = tf.train.AdamOptimizer(config.lr)
      self.train = self.optim.minimize(self.loss, var_list=self.E_Step.Param)

    ClsLbl = tf.arg_max(self.Gama, 3)
    ClsLbl = tf.cast(ClsLbl, tf.float32)
    
    ColorTable = [[255,0,0],[0,255,0],[0,0,255],[255,255,0], [0,255,255], [255,0,255]]
    colors = tf.cast(tf.constant(ColorTable), tf.float32)
    Msk = tf.tile(tf.expand_dims(ClsLbl, axis=3),[1,1,1,3])
    for k in range(0, config.ClusterNo):
        ClrTmpl = tf.einsum('anmd,df->anmf', tf.expand_dims(tf.ones_like(ClsLbl), axis=3), tf.reshape(colors[k,...],[1,3]))
        Msk = tf.where(tf.equal(Msk,k), ClrTmpl, Msk)
    
    
    self.X_rgb = utils.HSD2RGB(self.X_hsd)
    tf.summary.image("1.Input_image", self.X_rgb*255.0, max_outputs=2)
    tf.summary.image("2.Gamma_image",  Msk, max_outputs=2)
    tf.summary.image("3.Density_image", self.D*255.0, max_outputs=2)
    tf.summary.scalar("loss", self.loss)

    self.summary_op = tf.summary.merge_all()

    self.saver = tf.train.Saver()
    self.summary_writer = tf.summary.FileWriter(config.logs_dir, self.sess.graph)

    self.sess.run(tf.global_variables_initializer())
    
    ckpt = tf.train.get_checkpoint_state(config.logs_dir)
    if ckpt and ckpt.model_checkpoint_path:
        self.saver.restore(self.sess, ckpt.model_checkpoint_path)
        print("Model restored...")
Exemplo n.º 25
0
    def german_credit_model():
        x_numeric = tf.constant(numericals.astype(np.float32))
        x_categorical = [tf.one_hot(c, c.max() + 1) for c in categoricals]
        all_x = tf.concat([x_numeric] + x_categorical, 1)
        num_features = int(all_x.shape[1])

        overall_log_scale = ed.Normal(loc=0.,
                                      scale=10.,
                                      name='overall_log_scale')
        beta_log_scales = ed.Normal(loc=overall_log_scale,
                                    scale=tf.ones([num_features]),
                                    name='beta_log_scales')
        beta = ed.Normal(loc=tf.zeros([num_features]),
                         scale=tf.exp(beta_log_scales),
                         name='beta')
        logits = tf.einsum('nd,md->mn', all_x, beta[tf.newaxis, :])
        return ed.Bernoulli(logits=logits, name='y')
Exemplo n.º 26
0
def dense_layer_3d(input_tensor,
                   layer_idx,
                   total_layers,
                   num_attention_heads,
                   size_per_head,
                   initializer,
                   activation,
                   name=None):
    """A dense layer with 3D kernel.

  Args:
    input_tensor: float Tensor of shape [batch, seq_length, hidden_size].
    layer_idx: the index of the current layer.
    total_layers: total number of layers.
    num_attention_heads: Number of attention heads.
    size_per_head: The size per attention head.
    initializer: Kernel initializer.
    activation: Actication function.
    name: The name scope of this layer.

  Returns:
    float logits Tensor.
  """

    last_dim = get_shape_list(input_tensor)[-1]

    with tf.variable_scope(name, dtype=input_tensor.dtype):
        with tf.variable_scope("layer_%d" % layer_idx):
            w = tf.get_variable(
                name="kernel",
                shape=[last_dim, num_attention_heads, size_per_head],
                initializer=initializer)
        b = tf.get_variable(
            name="bias",
            shape=[total_layers, num_attention_heads, size_per_head],
            initializer=tf.zeros_initializer)
        b = tf.gather(b, layer_idx)

        ret = tf.einsum("abc,cde->abde", input_tensor, w)

        ret += b
        if activation is not None:
            return activation(ret)
        else:
            return ret
Exemplo n.º 27
0
def embedding_lookup(x, n_embed, d_embed, initializer, lookup_table=None,
                     use_tpu=True, scope="embedding", reuse=None,
                     dtype=tf.float32):
  """tpu and gpu embedding_lookup function."""
  with tf.variable_scope(scope, reuse=reuse):
    if lookup_table is None:
      lookup_table = tf.get_variable("lookup_table", shape=[n_embed, d_embed],
                                     dtype=dtype, initializer=initializer)

    if use_tpu:
      one_hot_idx = tf.one_hot(x, n_embed, dtype=dtype)
      einsum_prefix = get_einsum_prefix(x.shape.ndims)
      einsum_str = "{0}n,nd->{0}d".format(einsum_prefix)
      output = tf.einsum(einsum_str, one_hot_idx, lookup_table)
    else:
      output = tf.nn.embedding_lookup(lookup_table, x)

    return output, lookup_table
Exemplo n.º 28
0
Arquivo: losses.py Projeto: nnuq/tpu-1
    def __call__(self, logits, labels):
        _, height, width, num_classes = logits.get_shape().as_list()
        # Use bilinear resizing because nearest neighbor is not supported in
        # tensorflow 1.14 with TPU. Once the environment is updated, it should be
        # change back to nearest neighbor. For now, it is tested and the performance
        # should be similar.
        if self._use_groundtruth_dimension:
            logits = tf.image.resize_bilinear(logits,
                                              tf.shape(labels)[1:3],
                                              align_corners=False)
        else:
            labels = tf.image.resize_images(
                labels, (height, width), method=tf.image.ResizeMethod.BILINEAR)
        valid_mask = tf.not_equal(labels, self._ignore_label)
        normalizer = tf.reduce_sum(tf.to_float(valid_mask))
        # Assign pixel with ignore label to class 0 (background). The loss on the
        # pixel will later be masked out.
        labels = tf.where(valid_mask, labels, tf.zeros_like(labels))

        labels = tf.squeeze(tf.cast(labels, tf.int32), axis=3)
        valid_mask = tf.squeeze(tf.cast(valid_mask, tf.float32), axis=3)
        onehot_labels = tf.one_hot(labels, num_classes)
        onehot_labels = onehot_labels * (
            1 - self._label_smoothing) + self._label_smoothing / num_classes
        cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=onehot_labels, logits=logits)

        if not self._class_weights:
            class_weights = [1] * num_classes
        else:
            class_weights = self._class_weights

        if num_classes != len(class_weights):
            raise ValueError(
                'Length of class_weights should be {}'.format(num_classes))

        tf.logging.info('Using class weights: %s', class_weights)
        weight_mask = tf.einsum(
            '...y,y->...', tf.one_hot(labels, num_classes, dtype=tf.float32),
            tf.constant(class_weights, tf.float32))
        valid_mask *= weight_mask
        cross_entropy_loss *= tf.to_float(valid_mask)
        loss = tf.reduce_sum(cross_entropy_loss) / normalizer
        return loss
Exemplo n.º 29
0
def rel_seg_bias(q_head,
                 seg_mat,
                 n_head,
                 d_head,
                 initializer,
                 func_mask=None,
                 dtype=tf.float32):
    """Relative attention segmentation bias."""
    # Expand seg_mat: [... x N x T x T]
    tgt_shape = []
    for i in range(seg_mat.shape.ndims):
        tgt_shape.append(tf.shape(seg_mat)[i])
    tgt_shape.insert(-2, n_head)
    seg_mat = tf.expand_dims(seg_mat, -3)

    # Compute same / diff biases
    r_s_bias = tf.get_variable("r_s_bias", [n_head, d_head],
                               dtype=dtype,
                               initializer=initializer)
    seg_embed = tf.get_variable("seg_embed", [2, n_head, d_head],
                                dtype=dtype,
                                initializer=initializer)

    scale = tf.cast(1.0 / np.sqrt(d_head), dtype)
    q_head_s = q_head + r_s_bias * scale
    # [... x N x T x 2]

    seg_biases = tf.einsum("...inh,snh->...nis", q_head_s, seg_embed)
    print((q_head_s).get_shape(), (seg_embed).get_shape(),
          "==rel_seg_bias shape==", seg_biases.get_shape())

    # Split into `diff` & `same`: [... x N x T x 1]
    seg_bias_diff, seg_bias_same = tf.split(seg_biases, 2, axis=-1)

    # Broadcast
    seg_mat = tf.broadcast_to(seg_mat, tgt_shape)
    seg_bias_diff = tf.broadcast_to(seg_bias_diff, tgt_shape)
    seg_bias_same = tf.broadcast_to(seg_bias_same, tgt_shape)
    seg_bias = tf.where(seg_mat, seg_bias_same, seg_bias_diff)

    if func_mask is not None:
        seg_bias *= func_mask

    return seg_bias
Exemplo n.º 30
0
def lm_loss(
    hidden,
    target,
    n_token,
    d_model,
    initializer,
    lookup_table=None,
    tie_weight=False,
    bi_data=True,
    use_tpu=False,
):
    """doc."""

    with tf.variable_scope('lm_loss'):
        if tie_weight:
            assert (lookup_table
                    is not None), 'lookup_table cannot be None for tie_weight'
            softmax_w = lookup_table
        else:
            softmax_w = tf.get_variable(
                'weight',
                [n_token, d_model],
                dtype=hidden.dtype,
                initializer=initializer,
            )

        softmax_b = tf.get_variable(
            'bias',
            [n_token],
            dtype=hidden.dtype,
            initializer=tf.zeros_initializer(),
        )

        logits = tf.einsum('ibd,nd->ibn', hidden, softmax_w) + softmax_b

        if use_tpu:
            one_hot_target = tf.one_hot(target, n_token, dtype=logits.dtype)
            loss = -tf.reduce_sum(
                tf.nn.log_softmax(logits) * one_hot_target, -1)
        else:
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=target, logits=logits)

        return loss