def calculate_reshape(original_shape, new_shape, validate=False, name=None): """Calculates the reshaped dimensions (replacing up to one -1 in reshape).""" batch_shape_static = tensor_util.constant_value_as_shape(new_shape) if batch_shape_static.is_fully_defined(): return np.int32(batch_shape_static.as_list()), batch_shape_static, [] with ops.name_scope(name, "calculate_reshape", [original_shape, new_shape]): original_size = math_ops.reduce_prod(original_shape) implicit_dim = math_ops.equal(new_shape, -1) size_implicit_dim = ( original_size // math_ops.maximum(1, -math_ops.reduce_prod(new_shape))) new_ndims = array_ops.shape(new_shape) expanded_new_shape = array_ops.where( # Assumes exactly one `-1`. implicit_dim, array_ops.fill(new_ndims, size_implicit_dim), new_shape) validations = [] if not validate else [ check_ops.assert_rank( original_shape, 1, message="Original shape must be a vector."), check_ops.assert_rank( new_shape, 1, message="New shape must be a vector."), check_ops.assert_less_equal( math_ops.count_nonzero(implicit_dim, dtype=dtypes.int32), 1, message="At most one dimension can be unknown."), check_ops.assert_positive( expanded_new_shape, message="Shape elements must be >=-1."), check_ops.assert_equal( math_ops.reduce_prod(expanded_new_shape), original_size, message="Shape sizes do not match."), ] return expanded_new_shape, batch_shape_static, validations
def _MeanGrad(op, grad): """Gradient for Mean.""" sum_grad = _SumGrad(op, grad)[0] input_shape = array_ops.shape(op.inputs[0]) output_shape = array_ops.shape(op.outputs[0]) factor = _safe_shape_div(math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape)) return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
def test_docstring_example(self): # Produce the first 1000 members of the Halton sequence in 3 dimensions. num_results = 1000 dim = 3 with self.test_session(): sample = halton.sample(dim, num_results=num_results, randomized=False) # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional # hypercube. powers = math_ops.range(1.0, limit=dim + 1) integral = math_ops.reduce_mean( math_ops.reduce_prod(sample ** powers, axis=-1)) true_value = 1.0 / math_ops.reduce_prod(powers + 1.0) # Produces a relative absolute error of 1.7%. self.assertAllClose(integral.eval(), true_value.eval(), rtol=0.02) # Now skip the first 1000 samples and recompute the integral with the next # thousand samples. The sequence_indices argument can be used to do this. sequence_indices = math_ops.range(start=1000, limit=1000 + num_results, dtype=dtypes.int32) sample_leaped = halton.sample(dim, sequence_indices=sequence_indices, randomized=False) integral_leaped = math_ops.reduce_mean( math_ops.reduce_prod(sample_leaped ** powers, axis=-1)) self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.05)
def validate_init_args( distribution, batch_shape, validate_args, batch_shape_static): """Helper to __init__ which makes or raises assertions.""" with ops.name_scope(name="validate_init_args", values=[batch_shape] + distribution._graph_parents): # pylint: disable=protected-access runtime_assertions = [] if batch_shape.shape.ndims is not None: if batch_shape.shape.ndims != 1: raise ValueError("`batch_shape` must be a vector " "(saw rank: {}).".format( batch_shape.shape.ndims)) elif validate_args: runtime_assertions += [ check_ops.assert_rank( batch_shape, 1, message="`batch_shape` must be a vector.", name="assert_batch_shape_is_vector"), ] batch_size_static = np.prod(batch_shape_static) dist_batch_size_static = ( None if not distribution.batch_shape.is_fully_defined() else np.prod(distribution.batch_shape).value) if batch_size_static is not None and dist_batch_size_static is not None: if batch_size_static != dist_batch_size_static: raise ValueError("`batch_shape` size ({}) must match " "`distribution.batch_shape` size ({}).".format( batch_size_static, dist_batch_size_static)) elif validate_args: runtime_assertions += [ check_ops.assert_equal( math_ops.reduce_prod(batch_shape), math_ops.reduce_prod(distribution.batch_shape_tensor()), message=("`batch_shape` size must match " "`distributions.batch_shape` size."), name="assert_batch_size"), ] if batch_shape_static is not None: if np.any(batch_shape_static < 1): raise ValueError("`batch_shape` elements must be positive " "(i.e., larger than zero).") elif validate_args: runtime_assertions += [ check_ops.assert_positive( batch_shape, message=("`batch_shape` elements must be positive " "(i.e., larger than zero)."), name="assert_batch_shape_positive") ] return runtime_assertions
def _MeanGrad(op, grad): """Gradient for Mean.""" sum_grad = _SumGrad(op, grad)[0] input_shape = array_ops.shape(op.inputs[0]) output_shape = array_ops.shape(op.outputs[0]) # TODO(apassos) remove this device hackery as eager copy to device becomes # more seamless. with ops.colocate_with(input_shape): factor = _safe_shape_div( math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape)) if context.in_eager_mode(): factor = factor._copy(device_name=sum_grad.device) # pylint: disable=protected-access return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
def _MeanGrad(op, grad): """Gradient for Mean.""" sum_grad = _SumGrad(op, grad)[0] input_size = op.inputs[0].get_shape().num_elements() output_size = op.outputs[0].get_shape().num_elements() if input_size is not None and output_size is not None: factor = input_size // max(output_size, 1) factor = constant_op.constant(factor, dtype=sum_grad.dtype) else: input_shape = array_ops.shape(op.inputs[0]) output_shape = array_ops.shape(op.outputs[0]) factor = _safe_shape_div( math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape)) return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
def sample(self, sample_shape=(), seed=None, name="sample"): """Generate samples of the specified shape. Note that a call to `sample()` without arguments will generate a single sample. Args: sample_shape: Rank 1 `int32` `Tensor`. Shape of the generated samples. seed: Python integer seed for RNG name: name to give to the op. Returns: samples: a `Tensor` with prepended dimensions `sample_shape`. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[sample_shape]): sample_shape = ops.convert_to_tensor(sample_shape, dtype=dtypes.int32, name="sample_shape") total = math_ops.reduce_prod(sample_shape) samples = self.sample_n(total, seed) output_shape = array_ops.concat(0, [sample_shape, array_ops.slice( array_ops.shape(samples), [1], [-1])]) output = array_ops.reshape(samples, output_shape, name=name) output.set_shape(tensor_util.constant_value_as_shape( sample_shape).concatenate(samples.get_shape()[1:])) return output
def _entropy(self): if (not self.distribution.is_continuous or not self.bijector.is_constant_jacobian): raise NotImplementedError("entropy is not implemented") # Suppose Y = g(X) where g is a diffeomorphism and X is a continuous rv. It # can be shown that: # H[Y] = H[X] + E_X[(log o abs o det o J o g)(X)]. # If is_constant_jacobian then: # E_X[(log o abs o det o J o g)(X)] = (log o abs o det o J o g)(c) # where c can by anything. entropy = self.distribution.entropy() if self._is_maybe_event_override: # H[X] = sum_i H[X_i] if X_i are mutually independent. # This means that a reduce_sum is a simple rescaling. entropy *= math_ops.cast(math_ops.reduce_prod(self._override_event_shape), dtype=entropy.dtype.base_dtype) if self._is_maybe_batch_override: new_shape = array_ops.concat([ _ones_like(self._override_batch_shape), self.distribution.batch_shape_tensor() ], 0) entropy = array_ops.reshape(entropy, new_shape) multiples = array_ops.concat([ self._override_batch_shape, _ones_like(self.distribution.batch_shape_tensor()) ], 0) entropy = array_ops.tile(entropy, multiples) dummy = array_ops.zeros([], self.dtype) entropy -= self.bijector.inverse_log_det_jacobian(dummy) entropy.set_shape(self.batch_shape) return entropy
def _flip_vector_to_matrix_dynamic(vec, batch_shape): """flip_vector_to_matrix with dynamic shapes.""" # Shapes associated with batch_shape batch_rank = array_ops.size(batch_shape) # Shapes associated with vec. vec = ops.convert_to_tensor(vec, name="vec") vec_shape = array_ops.shape(vec) vec_rank = array_ops.rank(vec) vec_batch_rank = vec_rank - 1 m = vec_batch_rank - batch_rank # vec_shape_left = [M1,...,Mm] or []. vec_shape_left = array_ops.slice(vec_shape, [0], [m]) # If vec_shape_left = [], then condensed_shape = [1] since reduce_prod([]) = 1 # If vec_shape_left = [M1,...,Mm], condensed_shape = [M1*...*Mm] condensed_shape = [math_ops.reduce_prod(vec_shape_left)] k = array_ops.gather(vec_shape, vec_rank - 1) new_shape = array_ops.concat(0, (batch_shape, [k], condensed_shape)) def _flip_front_dims_to_back(): # Permutation corresponding to [N1,...,Nn] + [k, M1,...,Mm] perm = array_ops.concat( 0, (math_ops.range(m, vec_rank), math_ops.range(0, m))) return array_ops.transpose(vec, perm=perm) x_flipped = control_flow_ops.cond( math_ops.less(0, m), _flip_front_dims_to_back, lambda: array_ops.expand_dims(vec, -1)) return array_ops.reshape(x_flipped, new_shape)
def _expand_sample_shape_to_vector(self, x, name): """Helper to `sample` which ensures input is 1D.""" x_static_val = tensor_util.constant_value(x) if x_static_val is None: prod = math_ops.reduce_prod(x) else: prod = np.prod(x_static_val, dtype=x.dtype.as_numpy_dtype()) ndims = x.get_shape().ndims # != sample_ndims if ndims is None: # Maybe expand_dims. ndims = array_ops.rank(x) expanded_shape = util.pick_vector( math_ops.equal(ndims, 0), np.array([1], dtype=np.int32), array_ops.shape(x)) x = array_ops.reshape(x, expanded_shape) elif ndims == 0: # Definitely expand_dims. if x_static_val is not None: x = ops.convert_to_tensor( np.array([x_static_val], dtype=x.dtype.as_numpy_dtype()), name=name) else: x = array_ops.reshape(x, [1]) elif ndims != 1: raise ValueError("Input is neither scalar nor vector.") return x, prod
def testDegenerate(self): with self.test_session(use_gpu=True): for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): # A large number is needed to get Eigen to die x = array_ops.zeros((0, 9938), dtype=dtype) y = math_ops.reduce_prod(x, [0]) self.assertAllEqual(y.eval(), np.ones(9938))
def test_tensor_array_grad(self): inp = constant_op.constant(np.random.rand(3, 4, 2), dtype=dtypes.float32) ta = tensor_array_ops.TensorArray(dtypes.float32, size=3) ta = ta.unstack(inp) def loop_fn(i): def body(j, x): value = ta.gather([j]) value = array_ops.gather(array_ops.reshape(value, [4, 2]), i) return j + 1, x + value _, out = control_flow_ops.while_loop(lambda j, _: j < 3, body, (0, array_ops.zeros([2]))) out = math_ops.reduce_prod(out) return out, gradient_ops.gradients(out, inp)[0] pfor_out, pfor_out_grad = pfor_control_flow_ops.pfor(loop_fn, 4) # Note that tf.while_loop does not work in the setup above. So we manually # construct the equivalent computation of the above loops here. real_out = math_ops.reduce_sum(inp, axis=[0]) real_out = math_ops.reduce_prod(real_out, axis=[1]) # Note that gradients of real_out will accumulate the gradients across the # output value. Hence we do the same aggregation on pfor_out_grad. real_out_grad = gradient_ops.gradients(real_out, inp)[0] sum_pfor_out_grad = math_ops.reduce_sum(pfor_out_grad, axis=[0]) with session.Session() as sess: v1, v2, v1_grad, v2_grad = sess.run( [pfor_out, real_out, sum_pfor_out_grad, real_out_grad]) self.assertAllClose(v1, v2) self.assertAllClose(v1_grad, v2_grad)
def _unblockify_then_matricize(self, vec): """Flatten the block dimensions then reshape to a batch matrix.""" # Suppose # vec.shape = [v0, v1, v2, v3], # self.block_depth = 2. # Then # leading shape = [v0, v1] # block shape = [v2, v3]. # We will reshape vec to # [v1, v2*v3, v0]. # Un-blockify: Flatten block dimensions. Reshape # [v0, v1, v2, v3] --> [v0, v1, v2*v3]. if vec.get_shape().is_fully_defined(): # vec_shape = [v0, v1, v2, v3] vec_shape = vec.get_shape().as_list() # vec_leading_shape = [v0, v1] vec_leading_shape = vec_shape[:-self.block_depth] # vec_block_shape = [v2, v3] vec_block_shape = vec_shape[-self.block_depth:] # flat_shape = [v0, v1, v2*v3] flat_shape = vec_leading_shape + [np.prod(vec_block_shape)] else: vec_shape = array_ops.shape(vec) vec_leading_shape = vec_shape[:-self.block_depth] vec_block_shape = vec_shape[-self.block_depth:] flat_shape = array_ops.concat( (vec_leading_shape, [math_ops.reduce_prod(vec_block_shape)]), 0) vec_flat = array_ops.reshape(vec, flat_shape) # Matricize: Reshape to batch matrix. # [v0, v1, v2*v3] --> [v1, v2*v3, v0], # representing a shape [v1] batch of [v2*v3, v0] matrices. matrix = distribution_util.rotate_transpose(vec_flat, shift=-1) return matrix
def sequences_loss(logits, targets, weights, num_decoders, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Product of weighted cross-entropy loss for sequences of logits, batch-collapsed. Args: logits: Lists of 2D Tensors of shape [batch_size x num_decoder_symbols] of size num_decoders. targets: Lists of 1D batch-sized int32 Tensors of the same lengths as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The products of average log-perplexities per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ if len(targets) != len(logits) or num_decoders != len(logits): raise ValueError("Lengths of logits and targets must be %d, not " "%d, %d." % (num_decoders, len(logits), len(targets))) losses = [] for i in xrange(num_decoders): losses.append(tf.nn.seq2seq.sequence_loss(logits[i],targets[i], weights[i], average_across_timesteps,average_across_batch,softmax_loss_function,name) ) return math_ops.reduce_prod(losses)
def _determinant_from_sigma_chol(sigma_chol): det_last_dim = array_ops.rank(sigma_chol) - 2 sigma_batch_diag = array_ops.batch_matrix_diag_part(sigma_chol) det = math_ops.square(math_ops.reduce_prod( sigma_batch_diag, reduction_indices=det_last_dim)) det.set_shape(sigma_chol.get_shape()[:-2]) return det
def _sample_n(self, n, seed=None): # Get ids as a [n, batch_size]-shaped matrix, unless batch_shape=[] then get # ids as a [n]-shaped vector. batch_size = (np.prod(self.batch_shape.as_list(), dtype=np.int32) if self.batch_shape.is_fully_defined() else math_ops.reduce_prod(self.batch_shape_tensor())) ids = self._mixture_distribution.sample( sample_shape=concat_vectors( [n], distribution_util.pick_vector( self.is_scalar_batch(), np.int32([]), [batch_size])), seed=distribution_util.gen_new_seed( seed, "poisson_lognormal_quadrature_compound")) # Stride `quadrature_size` for `batch_size` number of times. offset = math_ops.range(start=0, limit=batch_size * self._quadrature_size, delta=self._quadrature_size, dtype=ids.dtype) ids += offset rate = array_ops.gather( array_ops.reshape(self.distribution.rate, shape=[-1]), ids) rate = array_ops.reshape( rate, shape=concat_vectors([n], self.batch_shape_tensor())) return random_ops.random_poisson( lam=rate, shape=[], dtype=self.dtype, seed=seed)
def _prob(self, y): x, ildj = self.bijector.inverse_and_inverse_log_det_jacobian(y) x = self._maybe_rotate_dims(x, rotate_right=True) prob = self.distribution.prob(x) if self._is_maybe_event_override: prob = math_ops.reduce_prod(prob, self._reduce_event_indices) return math_ops.exp(ildj) * prob
def embedding_lookup_unique(params, ids, name=None): """Version of embedding_lookup that avoids duplicate lookups. This can save communication in the case of repeated ids. Same interface as embedding_lookup. Except it supports multi-dimensional `ids` which allows to not reshape input/output to fit gather. Args: params: A list of tensors with the same shape and type, or a `PartitionedVariable`. Shape `[index, d1, d2, ...]`. ids: A one-dimensional `Tensor` with type `int32` or `int64` containing the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`. name: A name for this operation (optional). Returns: A `Tensor` with the same type as the tensors in `params` and dimension of `[ids1, ids2, d1, d2, ...]`. Raises: ValueError: If `params` is empty. """ with ops.name_scope(name, "EmbeddingLookupUnique", [params, ids]): ids = ops.convert_to_tensor(ids) shape = array_ops.shape(ids) ids_flat = array_ops.reshape( ids, math_ops.reduce_prod(shape, keep_dims=True)) unique_ids, idx = array_ops.unique(ids_flat) unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids) embeds_flat = array_ops.gather(unique_embeddings, idx) embed_shape = array_ops.concat( [shape, array_ops.shape(unique_embeddings)[1:]], 0) embeds = array_ops.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate( unique_embeddings.get_shape()[1:])) return embeds
def embedding_lookup(params, ids, name='embedding_lookup'): """Provides a N dimensional version of tf.embedding_lookup. Ids are flattened to a 1d tensor before being passed to embedding_lookup then, they are unflattend to match the original ids shape plus an extra leading dimension of the size of the embeddings. Args: params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1. ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1. Must contain indexes into params. name: Optional name for the op. Returns: A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1 containing the values from the params tensor(s) for indecies in ids. Raises: ValueError: if some parameters are invalid. """ with ops.name_scope(name, 'embedding_lookup', [params, ids]): params = ops.convert_to_tensor(params) ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( ids, math_ops.reduce_prod(shape, keep_dims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat_v2([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:])) return embeds
def _expand_sample_shape(self, sample_shape): """Helper to `sample` which ensures sample_shape is 1D.""" sample_shape_static_val = tensor_util.constant_value(sample_shape) ndims = sample_shape.get_shape().ndims if sample_shape_static_val is None: if ndims is None or not sample_shape.get_shape().is_fully_defined(): ndims = array_ops.rank(sample_shape) expanded_shape = distribution_util.pick_vector( math_ops.equal(ndims, 0), np.array((1,), dtype=dtypes.int32.as_numpy_dtype()), array_ops.shape(sample_shape)) sample_shape = array_ops.reshape(sample_shape, expanded_shape) total = math_ops.reduce_prod(sample_shape) # reduce_prod([]) == 1 else: if ndims is None: raise ValueError( "Shouldn't be here; ndims cannot be none when we have a " "tf.constant shape.") if ndims == 0: sample_shape_static_val = np.reshape(sample_shape_static_val, [1]) sample_shape = ops.convert_to_tensor( sample_shape_static_val, dtype=dtypes.int32, name="sample_shape") total = np.prod(sample_shape_static_val, dtype=dtypes.int32.as_numpy_dtype()) return sample_shape, total
def _TopKGrad(op, grad, _): """Return the gradients for TopK. Args: op: The TopKOp for which we need to generate gradients. grad: Tensor. The gradients passed to the TopKOp. Returns: A list of two tensors, the first being the gradient w.r.t to the input and TopK, and the second being the gradient w.r.t. to the indices (all zero). """ in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim])) in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). ind = array_ops.reshape(ind_2d + array_ops.expand_dims( math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [array_ops.reshape( sparse_ops.sparse_to_dense(ind, array_ops.reshape( math_ops.reduce_prod(in_shape), [1]), array_ops.reshape(grad, [-1]), validate_indices=False), in_shape), array_ops.zeros( [], dtype=dtypes.int32)]
def per_step_batch_loss(self, features, mode, state): """Computes predictions, losses, and intermediate model states. Args: features: A dictionary with times, values, and (optionally) exogenous regressors. See `define_loss`. mode: The tf.estimator.ModeKeys mode to use (TRAIN, EVAL, INFER). state: Model-dependent state, each with size [batch size x ...]. The number and type will typically be fixed by the model (for example a mean and variance). Returns: A tuple of (loss, filtered_states, predictions) loss: Average loss values across the batch. filtered_states: For each Tensor in `state` with shape [batch size x ...], `filtered_states` has a Tensor with shape [batch size x window size x ...] with filtered state for each part of the batch and window. predictions: A dictionary with model-dependent one-step-ahead (or at-least-one-step-ahead with missing values) predictions, with keys indicating the type of prediction and values having shape [batch size x window size x ...]. For example state space models provide "mean", "covariance", and "log_likelihood". """ self._check_graph_initialized() times = math_ops.cast(features[TrainEvalFeatures.TIMES], dtype=dtypes.int64) values = math_ops.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype) exogenous_regressors = self._process_exogenous_features( times=times, features={key: value for key, value in features.items() if key not in [TrainEvalFeatures.TIMES, TrainEvalFeatures.VALUES]}) def _batch_loss_filtering_step(step_number, current_times, state): """Make a prediction and update it based on data.""" current_values = values[:, step_number, :] state = self._apply_exogenous_update( step_number=step_number, current_times=current_times, state=state, raw_features=features, embedded_exogenous_regressors=exogenous_regressors) predicted_state, predictions = self._prediction_step( current_times=current_times, state=state) filtered_state, outputs = self._filtering_step( current_times=current_times, current_values=current_values, state=predicted_state, predictions=predictions) return filtered_state, outputs state, outputs = self._state_update_loop( times=times, state=state, state_update_fn=_batch_loss_filtering_step, outputs=["loss"] + self._train_output_names) outputs["loss"].set_shape(times.get_shape()) loss_sum = math_ops.reduce_sum(outputs["loss"]) per_observation_loss = (loss_sum / math_ops.cast( math_ops.reduce_prod(array_ops.shape(times)), dtype=self.dtype)) per_observation_loss += self._loss_additions(times, values, mode) # Since we have window-level additions to the loss, its per-step value is # misleading, so we avoid returning it. del outputs["loss"] return per_observation_loss, state, outputs
def _finish_prob_for_one_fiber(self, y, x, ildj, distribution_kwargs): """Finish computation of prob on one element of the inverse image.""" x = self._maybe_rotate_dims(x, rotate_right=True) prob = self.distribution.prob(x, **distribution_kwargs) if self._is_maybe_event_override: prob = math_ops.reduce_prod(prob, self._reduce_event_indices) return math_ops.exp(math_ops.cast(ildj, prob.dtype)) * prob
def _shape_tensor(self): # See self.shape for explanation of steps s_shape = array_ops.shape(self._spectrum) batch_shape = s_shape[:-self.block_depth] trailing_dims = s_shape[-self.block_depth:] n = math_ops.reduce_prod(trailing_dims) n_x_n = [n, n] return array_ops.concat((batch_shape, n_x_n), 0)
def _MeanGrad(op, grad): """Gradient for Mean.""" sum_grad = _SumGrad(op, grad)[0] input_shape = op.inputs[0]._shape_tuple() # pylint: disable=protected-access output_shape = op.outputs[0]._shape_tuple() # pylint: disable=protected-access if (input_shape is not None and output_shape is not None and None not in input_shape and None not in output_shape): input_size = np.prod(input_shape) output_size = np.prod(output_shape) factor = input_size // max(output_size, 1) factor = constant_op.constant(factor, dtype=sum_grad.dtype) else: input_shape = array_ops.shape(op.inputs[0]) output_shape = array_ops.shape(op.outputs[0]) factor = _safe_shape_div( math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape)) return math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), None
def run_test_sample_consistent_log_prob( self, sess_run_fn, dist, num_samples=int(1e5), num_threshold=int(1e3), seed=42, rtol=1e-2, atol=0.): """Tests that sample/log_prob are consistent with each other. "Consistency" means that `sample` and `log_prob` correspond to the same distribution. Note: this test only verifies a necessary condition for consistency--it does does not verify sufficiency hence does not prove `sample`, `log_prob` truly are consistent. Args: sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and returning a list of results after running one "step" of TensorFlow computation, typically set to `sess.run`. dist: Distribution instance or object which implements `sample`, `log_prob`, `event_shape_tensor` and `batch_shape_tensor`. num_samples: Python `int` scalar indicating the number of Monte-Carlo samples to draw from `dist`. num_threshold: Python `int` scalar indicating the number of samples a bucket must contain before being compared to the probability. Default value: 1e3; must be at least 1. Warning, set too high will cause test to falsely pass but setting too low will cause the test to falsely fail. seed: Python `int` indicating the seed to use when sampling from `dist`. In general it is not recommended to use `None` during a test as this increases the likelihood of spurious test failure. rtol: Python `float`-type indicating the admissible relative error between analytical and sample statistics. atol: Python `float`-type indicating the admissible absolute error between analytical and sample statistics. Raises: ValueError: if `num_threshold < 1`. """ if num_threshold < 1: raise ValueError("num_threshold({}) must be at least 1.".format( num_threshold)) # Histogram only supports vectors so we call it once per batch coordinate. y = dist.sample(num_samples, seed=seed) y = array_ops.reshape(y, shape=[num_samples, -1]) batch_size = math_ops.reduce_prod(dist.batch_shape_tensor()) batch_dims = array_ops.shape(dist.batch_shape_tensor())[0] edges_expanded_shape = 1 + array_ops.pad([-2], paddings=[[0, batch_dims]]) for b, x in enumerate(array_ops.unstack(y, axis=1)): counts, edges = self.histogram(x) edges = array_ops.reshape(edges, edges_expanded_shape) probs = math_ops.exp(dist.log_prob(edges)) probs = array_ops.reshape(probs, shape=[-1, batch_size])[:, b] [counts_, probs_] = sess_run_fn([counts, probs]) valid = counts_ > num_threshold probs_ = probs_[valid] counts_ = counts_[valid] self.assertAllClose(probs_, counts_ / num_samples, rtol=rtol, atol=atol)
def _ProdGrad(op, grad): """Gradient for Prod.""" # The gradient can be expressed by dividing the product by each entry of the # input tensor, but this approach can't deal with zeros in the input. # Here, we avoid this problem by composing the output as a product of two # cumprod operations. input_shape = array_ops.shape(op.inputs[0]) # Reshape reduction indices for the case where the parameter is a scalar reduction_indices = array_ops.reshape(op.inputs[1], [-1]) # Expand grad to full input shape output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]) tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims) grad = array_ops.reshape(grad, output_shape_kept_dims) grad = array_ops.tile(grad, tile_scaling) # Pack all reduced dimensions into a single one, so we can perform the # cumprod ops. If the reduction dims list is empty, it defaults to float32, # so we need to cast here. We put all the shape-related ops on CPU to avoid # copying back and forth, and since listdiff is CPU only. with ops.device("/cpu:0"): rank = array_ops.rank(op.inputs[0]) reduction_indices = (reduction_indices + rank) % rank reduced = math_ops.cast(reduction_indices, dtypes.int32) idx = math_ops.range(0, rank) other, _ = array_ops.setdiff1d(idx, reduced) perm = array_ops.concat([reduced, other], 0) reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced)) other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other)) permuted = array_ops.transpose(op.inputs[0], perm) permuted_shape = array_ops.shape(permuted) reshaped = array_ops.reshape(permuted, (reduced_num, other_num)) # Calculate product, leaving out the current entry left = math_ops.cumprod(reshaped, axis=0, exclusive=True) right = math_ops.cumprod(reshaped, axis=0, exclusive=True, reverse=True) # For complex inputs, the gradient is in the conjugate direction. y = array_ops.reshape(math_ops.conj(left) * math_ops.conj(right), permuted_shape) # Invert the transpose and reshape operations. # Make sure to set the statically known shape information through a reshape. out = grad * array_ops.transpose(y, array_ops.invert_permutation(perm)) return array_ops.reshape(out, input_shape), None
def _prob(self, y, bijector_kwargs=None, distribution_kwargs=None): bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} x, ildj = self.bijector.inverse_and_inverse_log_det_jacobian( y, **bijector_kwargs) prob = self.distribution.prob(x, **distribution_kwargs) if self._override_event_shape is not None: prob = math_ops.reduce_prod(prob, self._reduce_event_indices) return math_ops.exp(ildj) * prob
def testProdGradientForNegativeAxis(self): inputs = constant_op.constant([[1., 2.], [3., 4.]], dtype=dtypes.float32) outputs = math_ops.reduce_prod(inputs, -1) with self.cached_session(): error = gradient_checker.compute_gradient_error( inputs, inputs.get_shape().as_list(), outputs, outputs.get_shape().as_list()) self.assertLess(error, 1e-4)
def _MeanGrad(op, grad): """Gradient for Mean.""" sum_grad = _SumGrad(op, grad)[0] input_shape = array_ops.shape(op.inputs[0]) output_shape = array_ops.shape(op.outputs[0]) # TODO(apassos) remove this device hackery as eager copy to device becomes # more seamless. with ops.colocate_with(input_shape): factor = _safe_shape_div( math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape)) if context.in_eager_mode(): # Note that we go through numpy here just so we use the eager per-device # scalar cache. We know the factor is a host memory tensor because it's a # shape, and we also know that converting a scalar into a tensor triggers a # per-device cache. factor = factor.numpy() factor = constant_op.constant(factor, dtype=sum_grad.dtype) return sum_grad / math_ops.cast(factor, sum_grad.dtype), None
def lu_solve(lower_upper, perm, rhs, validate_args=False, name=None): """Solves systems of linear eqns `A X = RHS`, given LU factorizations. Note: this function does not verify the implied matrix is actually invertible nor is this condition checked even when `validate_args=True`. Args: lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `lower_upper = L + U - eye`. perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `perm = argmax(P)`. rhs: Matrix-shaped float `Tensor` representing targets for which to solve; `A X = RHS`. To handle vector cases, use: `lu_solve(..., rhs[..., tf.newaxis])[..., 0]`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. Note: this function does not verify the implied matrix is actually invertible, even when `validate_args=True`. Default value: `False` (i.e., don't validate arguments). name: Python `str` name given to ops managed by this object. Default value: `None` (i.e., 'lu_solve'). Returns: x: The `X` in `A @ X = RHS`. #### Examples ```python import numpy as np import tensorflow as tf import tensorflow_probability as tfp x = [[[1., 2], [3, 4]], [[7, 8], [3, 4]]] inv_x = tf.linalg.lu_solve(*tf.linalg.lu(x), rhs=tf.eye(2)) tf.assert_near(tf.matrix_inverse(x), inv_x) # ==> True ``` """ with ops.name_scope(name or 'lu_solve'): lower_upper = ops.convert_to_tensor(lower_upper, dtype_hint=dtypes.float32, name='lower_upper') perm = ops.convert_to_tensor(perm, dtype_hint=dtypes.int32, name='perm') rhs = ops.convert_to_tensor(rhs, dtype_hint=lower_upper.dtype, name='rhs') assertions = _lu_solve_assertions(lower_upper, perm, rhs, validate_args) if assertions: with ops.control_dependencies(assertions): lower_upper = array_ops.identity(lower_upper) perm = array_ops.identity(perm) rhs = array_ops.identity(rhs) if (rhs.shape.rank == 2 and perm.shape.rank == 1): # Both rhs and perm have scalar batch_shape. permuted_rhs = array_ops.gather(rhs, perm, axis=-2) else: # Either rhs or perm have non-scalar batch_shape or we can't determine # this information statically. rhs_shape = array_ops.shape(rhs) broadcast_batch_shape = array_ops.broadcast_dynamic_shape( rhs_shape[:-2], array_ops.shape(perm)[:-1]) d, m = rhs_shape[-2], rhs_shape[-1] rhs_broadcast_shape = array_ops.concat( [broadcast_batch_shape, [d, m]], axis=0) # Tile out rhs. broadcast_rhs = array_ops.broadcast_to(rhs, rhs_broadcast_shape) broadcast_rhs = array_ops.reshape(broadcast_rhs, [-1, d, m]) # Tile out perm and add batch indices. broadcast_perm = array_ops.broadcast_to(perm, rhs_broadcast_shape[:-1]) broadcast_perm = array_ops.reshape(broadcast_perm, [-1, d]) broadcast_batch_size = math_ops.reduce_prod(broadcast_batch_shape) broadcast_batch_indices = array_ops.broadcast_to( math_ops.range(broadcast_batch_size)[:, array_ops.newaxis], [broadcast_batch_size, d]) broadcast_perm = array_ops.stack( [broadcast_batch_indices, broadcast_perm], axis=-1) permuted_rhs = array_ops.gather_nd(broadcast_rhs, broadcast_perm) permuted_rhs = array_ops.reshape(permuted_rhs, rhs_broadcast_shape) lower = set_diag( band_part(lower_upper, num_lower=-1, num_upper=0), array_ops.ones(array_ops.shape(lower_upper)[:-1], dtype=lower_upper.dtype)) return triangular_solve( lower_upper, # Only upper is accessed. triangular_solve(lower, permuted_rhs), lower=False)
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot( indices=array_ops.fill([batch_size, beam_width], end_token), depth=vocab_size, on_value=np.int64(0), off_value=np.int64(1), dtype=dtypes.int64) add_mask = math_ops.to_int64(math_ops.logical_not(previously_finished)) lengths_to_add *= array_ops.expand_dims(add_mask, 2) new_prediction_lengths = ( lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) # Calculate the scores for each beam scores = _get_scores( log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = array_ops.shape(scores) scores_flat = control_flow_ops.cond( time > 0, lambda: array_ops.reshape(scores, [batch_size, -1]), lambda: scores[:, 0]) num_available_beam = control_flow_ops.cond( time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]), lambda: math_ops.reduce_prod(scores_shape[2:])) # Pick the next beams according to the specified successors function next_beam_size = math_ops.minimum( ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"), num_available_beam) next_beam_scores, word_indices = nn_ops.top_k(scores_flat, k=next_beam_size) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper( gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod(word_indices, vocab_size, name="next_beam_word_ids") next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32(word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = math_ops.to_int64(math_ops.logical_not(next_finished)) next_prediction_len = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState( cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) output = BeamSearchDecoderOutput( scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def sufficient_statistics(x, axes, shift=True, keep_dims=False, name=None): """Calculate the sufficient statistics for the mean and variance of `x`. These sufficient statistics are computed using the one pass algorithm on an input that's optionally shifted using the value of the 1st element in `x`. See: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data Args: x: A `Tensor`. axes: Array of ints. Axes along which to compute mean and variance. shift: If true, shift the data to provide more numerically stable results. keep_dims: produce statistics with the same dimensionality as the input. name: Name used to scope the operations that compute the sufficient stats. Returns: Four `Tensor` objects of the same type as `x`: * the count (number of elements to average over). * the (possibly shifted) sum of the elements in the array. * the (possibly shifted) sum of squares of the elements in the array. * the shift by which the mean must be corrected or None if `shift` is False. """ with ops.op_scope([x, axes], name, "sufficient_statistics"): x = ops.convert_to_tensor(x, name="x") x_shape = x.get_shape() if x_shape.is_fully_defined(): counts = 1 m_shape = [] for d in xrange(x_shape.ndims): dim = x_shape[d].value if d in set(axes): counts *= dim dim = 1 m_shape.append(dim) counts = constant_op.constant(counts, dtype=x.dtype) else: # shape needs to be inferred at runtime. x_shape = array_ops.shape(x) select_axes = sparse_ops.sparse_to_dense(axes, array_ops.shape(x_shape), True, False) m_shape = math_ops.select(select_axes, array_ops.ones_like(x_shape), x_shape) counts = math_ops.cast(math_ops.reduce_prod(x_shape / m_shape), x.dtype, name="count") if shift: shift_value = array_ops.slice(x, array_ops.zeros_like(m_shape), m_shape) m_ss = math_ops.sub(x, shift_value) v_ss = math_ops.squared_difference(x, shift_value) if keep_dims: shift_value = array_ops.identity(shift_value, name="shift") else: shift_value = array_ops.squeeze(shift_value, squeeze_dims=axes, name="shift") else: # not shift. m_ss = x v_ss = math_ops.square(x) shift_value = None m_ss = math_ops.reduce_sum(m_ss, axes, keep_dims=keep_dims, name="mean_ss") v_ss = math_ops.reduce_sum(v_ss, axes, keep_dims=keep_dims, name="var_ss") return counts, m_ss, v_ss, shift_value
def _subdiv_calculate_mean_and_var(self, x, axes, keep_dims): with K.name_scope('moments'): # The dynamic range of fp16 is too limited to support the collection of # sufficient statistics. As a workaround we simply perform the operations # on 32-bit floats before converting the mean and variance back to fp16 y = math_ops.cast( x, dtypes.float32) if x.dtype == dtypes.float16 else x replica_ctx = ds.get_replica_context() if replica_ctx: # local to me local_sum = math_ops.reduce_sum(y, axis=axes, keepdims=True) local_squared_sum = math_ops.reduce_sum(math_ops.square(y), axis=axes, keepdims=True) batch_size = math_ops.cast( array_ops.shape_v2(y)[0], dtypes.float32) # TODO(b/163099951): batch the all-reduces once we sort out the ordering # issue for NCCL. We don't have a mechanism to launch NCCL in the same # order in each replica nowadays, so we limit NCCL to batch all-reduces. # get the sum of all replicas (converge all devices) y_sum = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM, local_sum) # get the sum from all replicas (converge all devices) y_squared_sum = replica_ctx.all_reduce( reduce_util.ReduceOp.SUM, local_squared_sum) # get the net batch size from all devices (converge all devices) input_batch_size = replica_ctx.all_reduce( reduce_util.ReduceOp.SUM, batch_size) #tf.print(replica_ctx.replica_id_in_sync_group, replica_ctx.num_replicas_in_sync, batch_size, self.aggregated_square_sum_batch, axes) # get the number of total params you are averaging (local) axes_vals = [(array_ops.shape_v2(y))[i] for i in range(1, len(axes))] multiplier_ = math_ops.cast(math_ops.reduce_prod(axes_vals), dtypes.float32) multiplier = multiplier_ * input_batch_size # conver mean var (locally) mean = y_sum / multiplier y_squared_mean = y_squared_sum / multiplier # var = E(x^2) - E(x)^2 variance = y_squared_mean - math_ops.square(mean) net_sum = y_sum / multiplier_ squared_mean = y_squared_sum / multiplier_ else: # mean = math_ops.reduce_mean(y, axes, keepdims=True, name='mean') # # sample variance, not unbiased variance # # Note: stop_gradient does not change the gradient that gets # # backpropagated to the mean from the variance calculation, # # because that gradient is zero # variance = math_ops.reduce_mean( # math_ops.squared_difference(y, array_ops.stop_gradient(mean)), # axes, # keepdims=True, # name='variance') net_sum = math_ops.reduce_sum(y, axis=axes, keepdims=True) squared_mean = math_ops.reduce_sum(math_ops.square(y), axis=axes, keepdims=True) if self._support_zero_size_input(): # Keras assumes that batch dimension is the first dimension for Batch # Normalization. input_batch_size = array_ops.shape(y)[0] else: input_batch_size = None # get the number of total params you are averaging including batchsize(local) axes_vals = [(array_ops.shape_v2(y))[i] for i in range(1, len(axes))] multiplier = math_ops.cast(math_ops.reduce_prod(axes_vals), dtypes.float32) squared_mean = squared_mean / multiplier net_sum = net_sum / multiplier if input_batch_size is None: mean, variance = nn.moments(y, axes, keep_dims=True) input_batch_size = 0 else: batches_ = math_ops.cast(input_batch_size, self._param_dtype) # # if you only have one replica dont worry about it # # Compute true mean while keeping the dims for proper broadcasting. mean = net_sum / batches_ variance = squared_mean / batches_ - math_ops.square(mean) input_batch_size = math_ops.cast(input_batch_size, dtypes.int32) if not keep_dims: mean = array_ops.squeeze(mean, axes) net_sum = array_ops.squeeze(net_sum, axes) variance = array_ops.squeeze(variance, axes) squared_mean = array_ops.squeeze(squared_mean, axes) if x.dtype == dtypes.float16: return (math_ops.cast(mean, dtypes.float16), math_ops.cast(net_sum, dtypes.float16), math_ops.cast(variance, dtypes.float16), math_ops.cast(squared_mean, dtypes.float16), input_batch_size) else: return (mean, net_sum, variance, squared_mean, input_batch_size)
def _fft_size_for_grad(grad, rank): return _math_ops.reduce_prod(_array_ops.shape(grad)[-rank:])
def f(x): pointwise = math_ops.sin(x) * math_ops.tan(x) return math_ops.reduce_prod(pointwise + math_ops.reduce_sum(pointwise), axis=1)
def fun(x): return math_ops.reduce_prod(math_ops.tanh(x)**2)
def _determinant(self): reduction_indices = [-(i + 1) for i in range(self.block_depth)] det = math_ops.reduce_prod( self.spectrum, reduction_indices=reduction_indices) return math_ops.cast(det, self.dtype)
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight): """Performs a single step of Beam Search Decoding. Args: time: Beam search time step, should start at 0. At time 0 we assume that all beams are equal and consider only the first beam for continuations. logits: Logits at the current time step. A tensor of shape `[batch_size, beam_width, vocab_size]` next_cell_state: The next state from the cell, e.g. an instance of AttentionWrapperState if the cell is attentional. beam_state: Current state of the beam search. An instance of `BeamSearchDecoderState`. batch_size: The batch size for this input. beam_width: Python int. The size of the beams. end_token: The int32 end token. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. Returns: A new beam state. """ static_batch_size = tensor_util.constant_value(batch_size) # Calculate the current lengths of the predictions prediction_lengths = beam_state.lengths previously_finished = beam_state.finished # Calculate the total log probs for the new hypotheses # Final Shape: [batch_size, beam_width, vocab_size] step_log_probs = nn_ops.log_softmax(logits) #step_log_probs",Tensor shape=(?, 10, 56136) step_log_probs = _mask_probs(step_log_probs, end_token, previously_finished) #step_log_probs_masked (?, 10, 56136) total_probs = array_ops.expand_dims(beam_state.log_probs, 2) + step_log_probs #total_probs (?, 10, 56136) # Calculate the continuation lengths by adding to all continuing beams. vocab_size = logits.shape[-1].value or array_ops.shape(logits)[-1] lengths_to_add = array_ops.one_hot( indices=array_ops.tile(array_ops.reshape(end_token, [1, 1]), [batch_size, beam_width]), depth=vocab_size, on_value=constant_op.constant(0, dtype=dtypes.int64), off_value=constant_op.constant(1, dtype=dtypes.int64), dtype=dtypes.int64) #lengths_to_add shape=(?, 10, 56136) add_mask = (1 - math_ops.to_int64(previously_finished)) #add_mask shape=(?, 10), dtype=int64 lengths_to_add = array_ops.expand_dims(add_mask, 2) * lengths_to_add #lengths_to_add shape=(?, 10, 56136) new_prediction_lengths = (lengths_to_add + array_ops.expand_dims(prediction_lengths, 2)) #new_prediction_lengths shape=(?, 10, 56136) # Calculate the scores for each beam scores = _get_scores(log_probs=total_probs, sequence_lengths=new_prediction_lengths, length_penalty_weight=length_penalty_weight) scores_mask = tf.constant([step_log_probs.dtype.min, 0], dtype=dtypes.float32, shape=[vocab_size], name='mask') scores_masked = tf.add(scores, scores_mask) scores_mask2 = tf.constant([0, 0, 0, 0, 0, step_log_probs.dtype.min, 0], dtype=dtypes.float32, shape=[vocab_size], name='mask2') scores_masked = tf.add(scores_mask2, scores_masked) def new_scores(scores_masked): scores_no_stop = tf.constant([0, 0, step_log_probs.dtype.min, 0], dtype=dtypes.float32, shape=[vocab_size], name='no_stop') scores = tf.add(scores_masked, scores_no_stop) return scores #constrain the length scores = control_flow_ops.cond( #time <9 , time < 0, lambda: new_scores(scores_masked), lambda: scores_masked) #scores shape=(?, 10, 56136) #[batch_size, beam_width, vocab_size] time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam scores_shape = array_ops.shape(scores) #scores_shape" shape=(3,) scores_to_flat_1 = array_ops.reshape(scores, [batch_size, 2, -1]) print("scores_to_flat_1", scores_to_flat_1) scores_to_0 = scores[:, 0] scores_to_1 = scores[:, -1] scores_to_flat_2 = tf.concat([scores_to_0, scores_to_1], 1) scores_flat = control_flow_ops.cond( time > 0, lambda: scores_to_flat_1, lambda: array_ops.reshape(scores_to_flat_2, [batch_size, 2, -1])) num_available_beam = control_flow_ops.cond( time > 0, lambda: math_ops.reduce_prod(scores_shape[1:]), lambda: math_ops.reduce_prod(scores_shape[2:])) #scores_flat", shape=(?, ?) #num_available_beam" shape=() # Pick the next beams according to the specified successors function next_beam_size = math_ops.minimum( ops.convert_to_tensor(beam_width, dtype=dtypes.int32, name="beam_width"), num_available_beam) #scores_t = tf.reshape(scores_flat,[batch_size,2,-1]) ############################ #input_words=['entrencheds01', 'entrencheds02', 'forgev01', 'forgev04', \ # 'hitn02', 'hitn03', 'vaultn02', 'vaultn04', 'deepa03', \ # 'deeps02', 'admitv01', 'admitv02', 'plantn01', 'plantn02',\ # 'squaren01', 'squaren05', 'drawv05', 'drawv06', 'spellv03', \ # 'spellv02', 'shotn02', 'shotn04', 'coachv01', 'coachv02', 'casen05',\ # 'casen09', 'focusn01', 'focusn02', 'tasten01', 'tasten04', 'footn01', \ # 'footv01'] input_words = get_words() return_list = prior_scores(input_words) return_array = np.array(return_list) return_tensor = tf.convert_to_tensor(return_array) tiling = [1, 5, 1] prior_mask = tf.tile(tf.expand_dims(return_tensor, 1), tiling) prior_mask = tf.cast(prior_mask, tf.float32) prior_mask = array_ops.reshape(prior_mask, [batch_size, -1]) #print ("prior_mask",prior_mask) scores_sum = tf.reduce_sum(scores_to_flat_1, 1) #print ("scores_sum_1",scores_sum) #def cal_scores_sum(scores_sum,prior_mask): # return tf.add(scores_sum,prior_mask) #scores_sum = control_flow_ops.cond( # time > 0, # lambda: cal_scores_sum(scores_sum,prior_mask), # lambda: scores_sum) #scores_sum=tf.add(scores_sum,prior_mask) #print ("scores_sum_2",scores_sum) ############################ #scores_final=tf.concat([scores_sum, scores_sum],1) def cal_scores_indices(scores_to_0, scores_to_1): next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_to_0, k=5) print("ori next_beam_scores_1,word_indices_1", next_beam_scores_1) print("ori word_indices_1", word_indices_1) next_beam_scores_2, word_indices_2 = nn_ops.top_k(scores_to_1, k=5) next_beam_scores = tf.concat([next_beam_scores_1, next_beam_scores_2], 1) word_indices = tf.concat( [word_indices_1, word_indices_2 + 9 * vocab_size], 1) return next_beam_scores, word_indices def cal_scores_indices_t1(scores_final, next_beam_size): next_beam_scores_1, word_indices_1 = nn_ops.top_k(scores_final, k=5) #next_beam_scores_1, word_indices_1=sample(next_beam_scores_1,word_indices_1) print("next_beam_scores_1", next_beam_scores_1) print("word_indices_1", word_indices_1) next_beam_scores = tf.concat([next_beam_scores_1, next_beam_scores_1], 1) word_indices = tf.concat( [word_indices_1, word_indices_1 + 5 * vocab_size], 1) return next_beam_scores, word_indices next_beam_scores, word_indices = control_flow_ops.cond( time > 0, lambda: cal_scores_indices_t1(scores_sum, next_beam_size), lambda: cal_scores_indices(scores_to_0, scores_to_1)) next_beam_scores.set_shape([static_batch_size, beam_width]) word_indices.set_shape([static_batch_size, beam_width]) #shape=(?, ?) # Pick out the probs, beam_ids, and states according to the chosen predictions next_beam_probs = _tensor_gather_helper(gather_indices=word_indices, gather_from=total_probs, batch_size=batch_size, range_size=beam_width * vocab_size, gather_shape=[-1], name="next_beam_probs") # Note: just doing the following # math_ops.to_int32(word_indices % vocab_size, # name="next_beam_word_ids") # would be a lot cleaner but for reasons unclear, that hides the results of # the op which prevents capturing it with tfdbg debug ops. raw_next_word_ids = math_ops.mod(word_indices, vocab_size, name="next_beam_word_ids") #raw_next_word_ids shape=(?, 10) next_word_ids = math_ops.to_int32(raw_next_word_ids) next_beam_ids = math_ops.to_int32(word_indices / vocab_size, name="next_beam_parent_ids") # Append new ids to current predictions previously_finished = _tensor_gather_helper( gather_indices=next_beam_ids, gather_from=previously_finished, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_finished = math_ops.logical_or(previously_finished, math_ops.equal(next_word_ids, end_token), name="next_beam_finished") # Calculate the length of the next predictions. # 1. Finished beams remain unchanged # 2. Beams that are now finished (EOS predicted) remain unchanged # 3. Beams that are not yet finished have their length increased by 1 lengths_to_add = math_ops.to_int64( math_ops.not_equal(next_word_ids, end_token)) lengths_to_add = (1 - math_ops.to_int64(next_finished)) * lengths_to_add next_prediction_len = _tensor_gather_helper(gather_indices=next_beam_ids, gather_from=beam_state.lengths, batch_size=batch_size, range_size=beam_width, gather_shape=[-1]) next_prediction_len += lengths_to_add # Pick out the cell_states according to the next_beam_ids. We use a # different gather_shape here because the cell_state tensors, i.e. # the tensors that would be gathered from, all have dimension # greater than two and we need to preserve those dimensions. # pylint: disable=g-long-lambda next_cell_state = nest.map_structure( lambda gather_from: _maybe_tensor_gather_helper( gather_indices=next_beam_ids, gather_from=gather_from, batch_size=batch_size, range_size=beam_width, gather_shape=[batch_size * beam_width, -1]), next_cell_state) # pylint: enable=g-long-lambda next_state = BeamSearchDecoderState(cell_state=next_cell_state, log_probs=next_beam_probs, lengths=next_prediction_len, finished=next_finished) print('next_beam_probs', next_beam_probs) output = BeamSearchDecoderOutput(scores=next_beam_scores, predicted_ids=next_word_ids, parent_ids=next_beam_ids) return output, next_state
def test(self): result_lt = ops.reduce_prod(self.original_lt, {'channel'}) golden_lt = core.LabeledTensor( math_ops.reduce_prod(self.original_lt.tensor, 1), [self.a0, self.a2, self.a3]) self.assertLabeledTensorsEqual(result_lt, golden_lt)
def _sample_n(self, n, seed=None): with ops.control_dependencies(self._assertions): n = ops.convert_to_tensor(n, name="n") static_n = tensor_util.constant_value(n) n = int(static_n) if static_n is not None else n cat_samples = self.cat.sample(n, seed=seed) static_samples_shape = cat_samples.get_shape() if static_samples_shape.is_fully_defined(): samples_shape = static_samples_shape.as_list() samples_size = static_samples_shape.num_elements() else: samples_shape = array_ops.shape(cat_samples) samples_size = array_ops.size(cat_samples) static_batch_shape = self.batch_shape if static_batch_shape.is_fully_defined(): batch_shape = static_batch_shape.as_list() batch_size = static_batch_shape.num_elements() else: batch_shape = self.batch_shape_tensor() batch_size = math_ops.reduce_prod(batch_shape) static_event_shape = self.event_shape if static_event_shape.is_fully_defined(): event_shape = np.array(static_event_shape.as_list(), dtype=np.int32) else: event_shape = self.event_shape_tensor() # Get indices into the raw cat sampling tensor. We will # need these to stitch sample values back out after sampling # within the component partitions. samples_raw_indices = array_ops.reshape( math_ops.range(0, samples_size), samples_shape) # Partition the raw indices so that we can use # dynamic_stitch later to reconstruct the samples from the # known partitions. partitioned_samples_indices = data_flow_ops.dynamic_partition( data=samples_raw_indices, partitions=cat_samples, num_partitions=self.num_components) # Copy the batch indices n times, as we will need to know # these to pull out the appropriate rows within the # component partitions. batch_raw_indices = array_ops.reshape( array_ops.tile(math_ops.range(0, batch_size), [n]), samples_shape) # Explanation of the dynamic partitioning below: # batch indices are i.e., [0, 1, 0, 1, 0, 1] # Suppose partitions are: # [1 1 0 0 1 1] # After partitioning, batch indices are cut as: # [batch_indices[x] for x in 2, 3] # [batch_indices[x] for x in 0, 1, 4, 5] # i.e. # [1 1] and [0 0 0 0] # Now we sample n=2 from part 0 and n=4 from part 1. # For part 0 we want samples from batch entries 1, 1 (samples 0, 1), # and for part 1 we want samples from batch entries 0, 0, 0, 0 # (samples 0, 1, 2, 3). partitioned_batch_indices = data_flow_ops.dynamic_partition( data=batch_raw_indices, partitions=cat_samples, num_partitions=self.num_components) samples_class = [None for _ in range(self.num_components)] for c in range(self.num_components): n_class = array_ops.size(partitioned_samples_indices[c]) seed = distribution_util.gen_new_seed(seed, "mixture") samples_class_c = self.components[c].sample(n_class, seed=seed) # Pull out the correct batch entries from each index. # To do this, we may have to flatten the batch shape. # For sample s, batch element b of component c, we get the # partitioned batch indices from # partitioned_batch_indices[c]; and shift each element by # the sample index. The final lookup can be thought of as # a matrix gather along locations (s, b) in # samples_class_c where the n_class rows correspond to # samples within this component and the batch_size columns # correspond to batch elements within the component. # # Thus the lookup index is # lookup[c, i] = batch_size * s[i] + b[c, i] # for i = 0 ... n_class[c] - 1. lookup_partitioned_batch_indices = ( batch_size * math_ops.range(n_class) + partitioned_batch_indices[c]) samples_class_c = array_ops.reshape( samples_class_c, array_ops.concat([[n_class * batch_size], event_shape], 0)) samples_class_c = array_ops.gather( samples_class_c, lookup_partitioned_batch_indices, name="samples_class_c_gather") samples_class[c] = samples_class_c # Stitch back together the samples across the components. lhs_flat_ret = data_flow_ops.dynamic_stitch( indices=partitioned_samples_indices, data=samples_class) # Reshape back to proper sample, batch, and event shape. ret = array_ops.reshape( lhs_flat_ret, array_ops.concat( [samples_shape, self.event_shape_tensor()], 0)) ret.set_shape( tensor_shape.TensorShape(static_samples_shape).concatenate( self.event_shape)) return ret
def lu_reconstruct(lower_upper, perm, validate_args=False, name=None): """The reconstruct one or more matrices from their LU decomposition(s). Args: lower_upper: `lu` as returned by `tf.linalg.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `lower_upper = L + U - eye`. perm: `p` as returned by `tf.linag.lu`, i.e., if `matmul(P, matmul(L, U)) = X` then `perm = argmax(P)`. validate_args: Python `bool` indicating whether arguments should be checked for correctness. Default value: `False` (i.e., don't validate arguments). name: Python `str` name given to ops managed by this object. Default value: `None` (i.e., 'lu_reconstruct'). Returns: x: The original input to `tf.linalg.lu`, i.e., `x` as in, `lu_reconstruct(*tf.linalg.lu(x))`. #### Examples ```python import numpy as np import tensorflow as tf import tensorflow_probability as tfp x = [[[3., 4], [1, 2]], [[7., 8], [3, 4]]] x_reconstructed = tf.linalg.lu_reconstruct(*tf.linalg.lu(x)) tf.assert_near(x, x_reconstructed) # ==> True ``` """ with ops.name_scope(name or 'lu_reconstruct'): lower_upper = ops.convert_to_tensor(lower_upper, dtype_hint=dtypes.float32, name='lower_upper') perm = ops.convert_to_tensor(perm, dtype_hint=dtypes.int32, name='perm') assertions = lu_reconstruct_assertions(lower_upper, perm, validate_args) if assertions: with ops.control_dependencies(assertions): lower_upper = array_ops.identity(lower_upper) perm = array_ops.identity(perm) shape = array_ops.shape(lower_upper) lower = set_diag(band_part(lower_upper, num_lower=-1, num_upper=0), array_ops.ones(shape[:-1], dtype=lower_upper.dtype)) upper = band_part(lower_upper, num_lower=0, num_upper=-1) x = math_ops.matmul(lower, upper) if (lower_upper.shape is None or lower_upper.shape.rank is None or lower_upper.shape.rank != 2): # We either don't know the batch rank or there are >0 batch dims. batch_size = math_ops.reduce_prod(shape[:-2]) d = shape[-1] x = array_ops.reshape(x, [batch_size, d, d]) perm = array_ops.reshape(perm, [batch_size, d]) perm = map_fn.map_fn(array_ops.invert_permutation, perm) batch_indices = array_ops.broadcast_to( math_ops.range(batch_size)[:, array_ops.newaxis], [batch_size, d]) x = array_ops.gather_nd( x, array_ops.stack([batch_indices, perm], axis=-1)) x = array_ops.reshape(x, shape) else: x = array_ops.gather(x, array_ops.invert_permutation(perm)) x.set_shape(lower_upper.shape) return x
def testEmptyGradients(self): with self.session(use_gpu=True): x = array_ops.zeros([0, 3]) y = math_ops.reduce_prod(x, [1]) error = gradient_checker.compute_gradient_error(x, [0, 3], y, [0]) self.assertEqual(error, 0)
def indicator(x): x1_times_x2 = math_ops.reduce_prod(x, axis=[-1]) return 0.5 * (math_ops.sign(x1_times_x2) + 1.0)
def call(self, inputs, mask=None): ##### the above line(s) were modified by Ngaiman Chow on 2019-10-29 for including parameter mask if not isinstance(inputs, list): raise ValueError( 'A merge layer should be called on a list of inputs.') if self._reshape_required: reshaped_inputs = [] input_ndims = list(map(K.ndim, inputs)) if None not in input_ndims: # If ranks of all inputs are available, # we simply expand each of them at axis=1 # until all of them have the same rank. max_ndim = max(input_ndims) for x in inputs: x_ndim = K.ndim(x) for _ in range(max_ndim - x_ndim): x = array_ops.expand_dims(x, axis=1) reshaped_inputs.append(x) return self._merge_function(reshaped_inputs) else: # Transpose all inputs so that batch size is the last dimension. # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size) transposed = False for x in inputs: x_ndim = K.ndim(x) if x_ndim is None: x_shape = array_ops.shape(x) batch_size = x_shape[0] new_shape = K.concatenate([ x_shape[1:], array_ops.expand_dims(batch_size, axis=-1) ]) x_transposed = array_ops.reshape( x, array_ops.stack([ batch_size, math_ops.reduce_prod(x_shape[1:]) ], axis=0)) x_transposed = array_ops.transpose(x_transposed, perm=(1, 0)) x_transposed = array_ops.reshape( x_transposed, new_shape) reshaped_inputs.append(x_transposed) transposed = True elif x_ndim > 1: dims = list(range(1, x_ndim)) + [0] reshaped_inputs.append( array_ops.transpose(x, perm=dims)) transposed = True else: # We don't transpose inputs if they are 1D vectors or scalars. reshaped_inputs.append(x) y = self._merge_function(reshaped_inputs) y_ndim = K.ndim(y) if transposed: # If inputs have been transposed, we have to transpose the output too. if y_ndim is None: y_shape = array_ops.shape(y) y_ndim = array_ops.shape(y_shape)[0] batch_size = y_shape[y_ndim - 1] new_shape = K.concatenate([ array_ops.expand_dims(batch_size, axis=-1), y_shape[:y_ndim - 1] ]) y = array_ops.reshape(y, (-1, batch_size)) y = array_ops.transpose(y, perm=(1, 0)) y = array_ops.reshape(y, new_shape) elif y_ndim > 1: dims = [y_ndim - 1] + list(range(y_ndim - 1)) y = array_ops.transpose(y, perm=dims) return y else: return self._merge_function(inputs)
def run_test_sample_consistent_log_prob(self, sess_run_fn, dist, num_samples=int(1e5), num_threshold=int(1e3), seed=42, batch_size=None, rtol=1e-2, atol=0.): """Tests that sample/log_prob are consistent with each other. "Consistency" means that `sample` and `log_prob` correspond to the same distribution. Note: this code only verifies a necessary condition for consistency--it does does not verify sufficiency hence does not prove `sample`, `log_prob` truly are consistent. Args: sess_run_fn: Python `callable` taking `list`-like of `Tensor`s and returning a list of results after running one "step" of TensorFlow computation, typically set to `sess.run`. dist: Distribution instance or object which implements `sample`, `log_prob`, `event_shape_tensor` and `batch_shape_tensor`. num_samples: Python `int` scalar indicating the number of Monte-Carlo samples to draw from `dist`. num_threshold: Python `int` scalar indicating the number of samples a bucket must contain before being compared to the probability. Default value: 1e3; must be at least 1. Warning, set too high will cause code to falsely pass but setting too low will cause the code to falsely fail. seed: Python `int` indicating the seed to use when sampling from `dist`. In general it is not recommended to use `None` during a code as this increases the likelihood of spurious code failure. batch_size: Hint for unpacking result of samples. Default: `None` means batch_size is inferred. rtol: Python `float`-type indicating the admissible relative error between analytical and sample statistics. atol: Python `float`-type indicating the admissible absolute error between analytical and sample statistics. Raises: ValueError: if `num_threshold < 1`. """ if num_threshold < 1: raise ValueError( "num_threshold({}) must be at least 1.".format(num_threshold)) # Histogram only supports vectors so we call it once per batch coordinate. y = dist.sample(num_samples, seed=seed) y = array_ops.reshape(y, shape=[num_samples, -1]) if batch_size is None: batch_size = math_ops.reduce_prod(dist.batch_shape_tensor()) batch_dims = array_ops.shape(dist.batch_shape_tensor())[0] edges_expanded_shape = 1 + array_ops.pad([-2], paddings=[[0, batch_dims]]) for b, x in enumerate(array_ops.unstack(y, num=batch_size, axis=1)): counts, edges = self.histogram(x) edges = array_ops.reshape(edges, edges_expanded_shape) probs = math_ops.exp(dist.log_prob(edges)) probs = array_ops.reshape(probs, shape=[-1, batch_size])[:, b] [counts_, probs_] = sess_run_fn([counts, probs]) valid = counts_ > num_threshold probs_ = probs_[valid] counts_ = counts_[valid] self.assertAllClose(probs_, counts_ / num_samples, rtol=rtol, atol=atol)
def tensors_to_item(self, keys_to_tensors): item = self._handler.tensors_to_item(keys_to_tensors) return control_flow_ops.cond( pred=math_ops.equal(math_ops.reduce_prod(array_ops.shape(item)), 0), true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors), false_fn=lambda: item)
def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"): """Creates a (batch of) lower triangular matrix from a vector of inputs. If `x.get_shape()` is `[b1, b2, ..., bK, d]` then the output shape is `[b1, b2, ..., bK, n, n]` where `n` is such that `d = n(n+1)/2`, i.e., `n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.))`. Although the non-batch complexity is O(n**2), large constants and sub-optimal vectorization means the complexity of this function is 5x slower than zeroing out the upper triangular, i.e., `tf.matrix_band_part(X, -1, 0)`. This function becomes competitive only when several matmul/cholesky/etc ops can be ellided in constructing the input. Example: wiring a fully connected layer as a covariance matrix; this function reduces the final layer by 2x and possibly reduces the network arch complexity considerably. In most cases it is better to simply build a full matrix and zero out the upper triangular elements, e.g., `tril = tf.matrix_band_part(full, -1, 0)`, rather than directly construct a lower triangular. Example: ```python fill_lower_triangular([1, 2, 3, 4, 5, 6]) # Returns: [[1, 0, 0], # [2, 3, 0], # [4, 5, 6]] ``` For comparison, a pure numpy version of this function can be found in `distribution_util_test.py`, function `_fill_lower_triangular`. Args: x: `Tensor` representing lower triangular elements. validate_args: Python `bool`, default `False`. Whether to ensure the shape of `x` can be mapped to a lower triangular matrix (controls non-static checks only). name: Python `str`. The name to give this op. Returns: tril: `Tensor` with lower triangular elements filled from `x`. Raises: ValueError: if shape if `x` has static shape which cannot be mapped to a lower triangular matrix. """ # TODO(jvdillon): Replace this code with dedicated op when it exists. with ops.name_scope(name, values=[x]): x = ops.convert_to_tensor(x, name="x") if (x.get_shape().ndims is not None and x.get_shape()[-1].value is not None): d = x.get_shape()[-1].value # d = n(n+1)/2 implies n is: n = int(0.5 * (math.sqrt(1. + 8. * d) - 1.)) d_inferred = n * (n + 1) /2 if d != d_inferred: raise ValueError("Input cannot be mapped to a lower triangular; " "n*(n+1)/2 = %d != %d" % (d_inferred, d)) final_shape = x.get_shape()[:-1].concatenate( tensor_shape.TensorShape([n, n])) else: d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32) # d = n(n+1)/2 implies n is: n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.), dtype=dtypes.int32) if validate_args: is_valid_input_shape = check_ops.assert_equal( n * (n + 1) / 2, d, message="Input cannot be mapped to a lower triangular.") n = control_flow_ops.with_dependencies([is_valid_input_shape], n) final_shape = x.get_shape()[:-1].concatenate( tensor_shape.TensorShape([None, None])) def tril_ids(n): """Internal helper to create vector of linear indices into y.""" # Build the ids statically; chose 512 because it implies 1MiB. if not tensor_util.is_tensor(n) and n <= 512: ids = np.arange(n**2, dtype=np.int32) rows = (ids / n).astype(np.int32) # Implicit floor. # We need to stop incrementing the index when we encounter # upper-triangular elements. The idea here is to compute the # lower-right number of zeros then by "symmetry" subtract this from the # total number of zeros, n(n-1)/2. # Then we note that: n(n-1)/2 - (n-r)*(n-r-1)/2 = r(2n-r-1)/2 offset = (rows * (2 * n - rows - 1) / 2).astype(np.int32) # We could also zero out when (rows < cols) == (rows < ids-n*rows). # mask = (ids <= (n + 1) * rows).astype(np.int32) else: ids = math_ops.range(n**2) rows = math_ops.cast(ids / n, dtype=dtypes.int32) offset = math_ops.cast(rows * (2 * n - rows - 1) / 2, dtype=dtypes.int32) return ids - offset # Special-case non-batch case. if x.get_shape().ndims == 1: y = array_ops.gather(x, array_ops.reshape(tril_ids(n), [n, n])) y = array_ops.matrix_band_part(y, -1, 0) y.set_shape(y.get_shape().merge_with(final_shape)) return y # Make ids for each batch dim. if (x.get_shape().ndims is not None and x.get_shape()[:-1].is_fully_defined()): batch_shape = np.asarray(x.get_shape()[:-1].as_list(), dtype=np.int32) m = np.prod(batch_shape).astype(np.int32) else: batch_shape = array_ops.shape(x)[:-1] m = math_ops.reduce_prod(array_ops.shape(x)[:-1]) batch_ids = math_ops.range(m) # Assemble the tril_ids into batch,tril_id pairs. idx = array_ops.stack([ array_ops.tile(array_ops.expand_dims(batch_ids, 1), [1, n * n]), array_ops.tile(array_ops.expand_dims(tril_ids(n), 0), [m, 1]) ]) idx = array_ops.transpose(idx, [1, 2, 0]) # Gather up, reshape, and return. y = array_ops.reshape(x, [-1, d]) y = array_ops.gather_nd(y, idx) y = array_ops.reshape(y, array_ops.concat([batch_shape, [n, n]], 0)) y = array_ops.matrix_band_part(y, -1, 0) y.set_shape(y.get_shape().merge_with(final_shape)) return y
def batch_index(vectors, indices, name=None): """Indexes into a batch of vectors. Args: vectors: An N-D Tensor. indices: A K-D integer Tensor, K <= N. The first K - 1 dimensions of indices must be broadcastable to the first N - 1 dimensions of vectors. name: A name for this operation (optional). Returns: An N-D Tensor comprised of one element selected from each of the vectors. Example usage: vectors = [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [1, 2, 3]]] batch_index(vectors, 0) => [[1, 4], [7, 1]] batch_index(vectors, [0]) => [[[1], [4]], [[7], [1]]] batch_index(vectors, [0, 0, 2, 2]) => [[[1, 1, 3, 3], [4, 4, 6, 6]], [[7, 7, 9, 9], [1, 1, 3, 3]]] batch_index(vectors, [[0, 0, 2, 2], [0, 1, 2, 0]]) => [[[1, 1, 3, 3], [4, 5, 6, 4]], [[7, 7, 9, 9], [1, 2, 3, 1]]] """ with ops.op_scope([vectors, indices], name, "BatchIndex"): vectors = ops.convert_to_tensor(vectors, name="vectors") vectors_shape = array_ops.shape(vectors) vectors_rank = array_ops.size(vectors_shape) indices = ops.convert_to_tensor(indices, name="indices") indices_shape = array_ops.shape(indices) indices_rank = array_ops.size(indices_shape) # Support scalar indices. indices_are_scalar = None indices_are_scalar_tensor = math_ops.equal(0, indices_rank) if indices.get_shape().ndims is not None: indices_are_scalar = indices.get_shape().ndims == 0 if indices_are_scalar is None: indices, num_selected = control_flow_ops.cond( indices_are_scalar_tensor, lambda: [array_ops.expand_dims(indices, 0), # pylint: disable=g-long-lambda array_ops.constant(1, dtype=indices_shape.dtype)], lambda: [indices, array_ops.gather(indices_shape, indices_rank - 1)]) elif indices_are_scalar: num_selected = 1 indices = array_ops.expand_dims(indices, 0) else: num_selected = array_ops.gather(indices_shape, indices_rank - 1) # The batch shape is the first N-1 dimensions of `vectors`. batch_shape = array_ops.slice( vectors_shape, [0], array_ops.pack([vectors_rank - 1])) batch_size = math_ops.reduce_prod(batch_shape) # Broadcast indices to have shape `batch_shape + [num_selected]` bcast_shape = array_ops.concat(0, [batch_shape, [1]]) bcast_indices = indices + array_ops.zeros(bcast_shape, dtype=indices.dtype) # At this point, the first N-1 dimensions of `vectors` and # `bcast_indices` agree, and we're almost ready to call # `gather_nd`. But first we need to assign each index to a batch, # and we do that below by counting up to `batch_size`, repeating # each element `num_selected` times. batch_count = array_ops.tile( array_ops.expand_dims(math_ops.range(batch_size), 1), array_ops.pack([1, num_selected])) batch_count.set_shape([vectors.get_shape()[:-1].num_elements(), indices.get_shape()[-1]]) # Flatten the batch dimensions and gather. nd_indices = array_ops.concat( 1, [array_ops.reshape(batch_count, [-1, 1]), array_ops.reshape(bcast_indices, [-1, 1])]) nd_batches = array_ops.reshape(vectors, array_ops.pack([batch_size, -1])) ret = array_ops.gather_nd(nd_batches, nd_indices) # Reshape the output. if indices_are_scalar is None: ret = control_flow_ops.cond( indices_are_scalar_tensor, lambda: array_ops.reshape(ret, batch_shape), lambda: array_ops.reshape( # pylint: disable=g-long-lambda ret, array_ops.concat( 0, [batch_shape, array_ops.expand_dims(num_selected, 0)]))) elif indices_are_scalar: ret = array_ops.reshape(ret, batch_shape) ret.set_shape(vectors.get_shape()[:-1]) else: ret = array_ops.reshape( ret, array_ops.concat( 0, [batch_shape, array_ops.expand_dims(num_selected, 0)])) ret.set_shape(vectors.get_shape()[:-1] .concatenate(indices.get_shape()[-1:])) return ret
def _determinant(self): axis = [-(i + 1) for i in range(self.block_depth)] det = math_ops.reduce_prod(self.spectrum, axis=axis) return math_ops.cast(det, self.dtype)
def _sample_n(self, n, seed=None): x = self.distribution.sample( sample_shape=concat_vectors( [n], self.batch_shape_tensor(), self.event_shape_tensor()), seed=seed) # shape: [n, B, e] x = [aff.forward(x) for aff in self.endpoint_affine] # Get ids as a [n, batch_size]-shaped matrix, unless batch_shape=[] then get # ids as a [n]-shaped vector. batch_size = self.batch_shape.num_elements() if batch_size is None: batch_size = array_ops.reduce_prod(self.batch_shape_tensor()) mix_batch_size = self.mixture_distribution.batch_shape.num_elements() if mix_batch_size is None: mix_batch_size = math_ops.reduce_prod( self.mixture_distribution.batch_shape_tensor()) ids = self.mixture_distribution.sample( sample_shape=concat_vectors( [n], distribution_util.pick_vector( self.is_scalar_batch(), np.int32([]), [batch_size // mix_batch_size])), seed=distribution_util.gen_new_seed( seed, "vector_diffeomixture")) # We need to flatten batch dims in case mixture_distribution has its own # batch dims. ids = array_ops.reshape(ids, shape=concat_vectors( [n], distribution_util.pick_vector( self.is_scalar_batch(), np.int32([]), np.int32([-1])))) # Stride `components * quadrature_size` for `batch_size` number of times. stride = self.grid.shape.with_rank_at_least( 2)[-2:].num_elements() if stride is None: stride = array_ops.reduce_prod( array_ops.shape(self.grid)[-2:]) offset = math_ops.range(start=0, limit=batch_size * stride, delta=stride, dtype=ids.dtype) weight = array_ops.gather( array_ops.reshape(self.grid, shape=[-1]), ids + offset) # At this point, weight flattened all batch dims into one. # We also need to append a singleton to broadcast with event dims. if self.batch_shape.is_fully_defined(): new_shape = [-1] + self.batch_shape.as_list() + [1] else: new_shape = array_ops.concat( ([-1], self.batch_shape_tensor(), [1]), axis=0) weight = array_ops.reshape(weight, shape=new_shape) if len(x) != 2: # We actually should have already triggered this exception. However as a # policy we're putting this exception wherever we exploit the bimixture # assumption. raise NotImplementedError("Currently only bimixtures are supported; " "len(scale)={} is not 2.".format(len(x))) # Alternatively: # x = weight * x[0] + (1. - weight) * x[1] x = weight * (x[0] - x[1]) + x[1] return x
def safe_embedding_lookup_sparse(embedding_weights, sparse_ids, sparse_weights=None, combiner=None, default_id=None, name=None, partition_strategy="div"): """Lookup embedding results, accounting for invalid IDs and empty features. The partitioned embedding in `embedding_weights` must all be the same shape except for the first dimension. The first dimension is allowed to vary as the vocabulary size is not necessarily a multiple of `P`. `embedding_weights` may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a partitioner. Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A list of `P` float tensors or values representing partitioned embedding tensors. Alternatively, a `PartitionedVariable`, created by partitioning along dimension 0. The total unpartitioned shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size and `e_1, ..., e_m` are the embedding dimensions. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. Returns: Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. Raises: ValueError: if `embedding_weights` is empty. """ if combiner is None: logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" if embedding_weights is None or len(embedding_weights) < 1: raise ValueError("Missing embedding_weights %s." % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) embedding_weights = [ ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) with ops.name_scope(name, "embedding_lookup", embedding_weights + [sparse_ids, sparse_weights]) as scope: # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.shape original_rank_dim = sparse_ids.shape.get_shape()[0] original_rank = (array_ops.size(original_shape) if original_rank_dim.value is None else original_rank_dim.value) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = ops.SparseTensor(sparse_ids.indices, sparse_weights.values, sparse_ids.shape) # Prune invalid ids and weights. sparse_ids, sparse_weights = _prune_invalid_ids( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result = embedding_ops.embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=None if default_id is None else scope) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.pack([1, array_ops.shape(result)[1]])) result = math_ops.select(is_row_empty, array_ops.zeros_like(result), result, name=scope) # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat(0, [ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ])) final_result.set_shape( tensor_shape.unknown_shape( (original_rank_dim - 1).value).concatenate( result.get_shape()[1:])) return final_result
def _MatrixSquareRootGrad(op, grad): """Gradient for MatrixSquareRoot.""" # Let A be an m x m square matrix (or batch of matrices) # Let R = sqrtm(A) # By definition, A = RR # Take the differential: dA = d(RR) = RdR + dRR # Solve the resulting Sylvester equation for dR # Used to find Kronecker products within the Sylvester equation def _KroneckerProduct(b1, b2): """Computes the Kronecker product of two batches of square matrices.""" b1_shape = array_ops.shape(b1) b2_shape = array_ops.shape(b2) b1_order = b1_shape[-1] b2_order = b2_shape[-1] shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)] shape_slice = array_ops.slice( b1_shape, [0], shape_slice_size) # Same for both batches b1_reshape_shape = array_ops.concat( [shape_slice, [b1_order], [1], [b1_order], [1]], 0) b2_reshape_shape = array_ops.concat( [shape_slice, [1], [b2_order], [1], [b2_order]], 0) b1_reshape = array_ops.reshape(b1, b1_reshape_shape) b2_reshape = array_ops.reshape(b2, b2_reshape_shape) order_prod = b1_order * b2_order kprod_shape = array_ops.concat( [shape_slice, [order_prod], [order_prod]], 0) return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape) sqrtm = op.outputs[0] # R shape = array_ops.shape(sqrtm) order = shape[-1] # m matrix_count = math_ops.reduce_prod(shape[0:-2]) # Get batch of m x m identity matrices eye = linalg_ops.eye(order, dtype=sqrtm.dtype) # m x m identity matrix eye_flat = array_ops.reshape(eye, [-1]) eye_tiled = array_ops.tile(eye_flat, [matrix_count]) eye_batch = array_ops.reshape(eye_tiled, shape) # The transpose of R is taken in the k1 term instead of k2 in # order to prevent redundant transposition of R (i.e. (R')' = R) sqrtm_transpose = array_ops.matrix_transpose(sqrtm) k1 = _KroneckerProduct(eye_batch, sqrtm_transpose) k2 = _KroneckerProduct(sqrtm, eye_batch) ksum = math_ops.add(k1, k2) # Vectorize dA shape_slice_size = [math_ops.subtract(array_ops.size(shape), 2)] shape_slice = array_ops.slice(shape, [0], shape_slice_size) shape_vec_da = array_ops.concat([shape_slice, [order * order], [1]], 0) vec_da = array_ops.reshape(array_ops.matrix_transpose(grad), shape_vec_da) # Solve for vec(dR) vec_dsqrtm = linalg_ops.matrix_solve(ksum, vec_da) # Solve for dR by inverse vectorizing vec(dR) dsqrtm_transpose = array_ops.reshape(vec_dsqrtm, shape) return array_ops.matrix_transpose(dsqrtm_transpose)
def _determinant(self): return math_ops.reduce_prod(self._get_diag(), axis=[-1])
def _determinant(self): return math_ops.reduce_prod(self._diag, reduction_indices=[-1])
def safe_embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights=None, combiner="mean", default_id=None, name="safe_embedding_lookup_sparse", partition_strategy=None, # no used max_norm=None, return_trainable=False): """ Provides a dynamic version of `tf.nn.safe_embedding_lookup_sparse`. Lookup embedding results, accounting for empty features and invalid weights. Any IDs will be treated as valid include non-positive IDs. Invalid weights (<= 0) are pruned from input weights, as well as any IDs with non-positive weight. For an entry with no features, the embedding vector for `default_id` is returned, or the 0-vector if `default_id` is not supplied. The ids and weights may be multi-dimensional. Embeddings are always aggregated along the last dimension. Args: embedding_weights: A single `dynamic_embedding.Variable` instance representing the complete embedding tensor. sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the ids. `d_0` is typically batch size. sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing float weights corresponding to `sparse_ids`, or `None` if all weights are be assumed to be 1.0. combiner: A string specifying how to combine embedding results for each entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the default. default_id: The id to use for an entry with no features. name: A name for this operation (optional). partition_strategy: A string specifying the partitioning strategy. Currently `"div"` and `"mod"` are supported. Default is `"div"`. max_norm: If not `None`, all embeddings are l2-normalized to max_norm before combining. Returns: combined_embeddings: A dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. trainable_wrap: A TrainableWrapper object used to fill the Optimizers `var_list` Only provided if `return_trainable` is True. Raises: ValueError: if `embedding_weights` is empty. """ if embedding_weights is None: raise ValueError("Missing embedding_weights %s." % embedding_weights) if embedding_weights.key_dtype != sparse_ids.dtype: raise TypeError( "embedding_weights.key_dtype should be same with sparse_ids.dtype: " "{} vs. {}".format(embedding_weights.key_dtype, sparse_ids.dtype)) weights_dtype = sparse_weights.dtype if sparse_weights is not None else None if weights_dtype and embedding_weights.value_dtype != weights_dtype: raise TypeError( "embedding_weights.value_dtype should be same with sparse_weights.dtype" ": {} vs. {}".format(embedding_weights.value_dtype, weights_dtype)) scope = variable_scope.get_variable_scope() full_name = scope.name + "/" + name if scope.name else name with ops.name_scope(full_name + "/"): # Reshape higher-rank sparse ids and weights to linear segment ids. original_shape = sparse_ids.dense_shape original_rank_dim = tensor_shape.dimension_value( sparse_ids.dense_shape.get_shape()[0]) original_rank = (array_ops.size(original_shape) if original_rank_dim is None else original_rank_dim) sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ math_ops.reduce_prod( array_ops.slice(original_shape, [0], [original_rank - 1])), array_ops.gather(original_shape, original_rank - 1) ]) if sparse_weights is not None: sparse_weights = sparse_tensor.SparseTensor( sparse_ids.indices, sparse_weights.values, sparse_ids.dense_shape) # Prune invalid weights. if combiner != "sum": sparse_ids, sparse_weights = _prune_invalid_weights( sparse_ids, sparse_weights) # Fill in dummy values for empty features, if necessary. sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows( sparse_ids, default_id or 0) if sparse_weights is not None: sparse_weights, _ = sparse_ops.sparse_fill_empty_rows( sparse_weights, 1.0) result, trainable_ = embedding_lookup_sparse( embedding_weights, sparse_ids, sparse_weights, combiner=combiner, partition_strategy=partition_strategy, name=name + "/embedding_lookup_sparse", max_norm=max_norm, return_trainable=True) if default_id is None: # Broadcast is_row_empty to the same shape as embedding_lookup_result, # for use in Select. is_row_empty = array_ops.tile( array_ops.reshape(is_row_empty, [-1, 1]), array_ops.stack([1, array_ops.shape(result)[1]])) result = array_ops.where(is_row_empty, array_ops.zeros_like(result), result, name="where") # Reshape back from linear ids back into higher-dimensional dense result. final_result = array_ops.reshape( result, array_ops.concat([ array_ops.slice(math_ops.cast(original_shape, dtypes.int32), [0], [original_rank - 1]), array_ops.slice(array_ops.shape(result), [1], [-1]) ], 0)) final_result.set_shape( tensor_shape.unknown_shape( (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(result.get_shape()[1:])) return (final_result, trainable_) if return_trainable else final_result
def _FFTSizeForGrad(grad, rank): return math_ops.reduce_prod( array_ops.slice(array_ops.reverse(array_ops.shape(grad), (True, )), (0, ), (rank, )))
def bincount(arr, weights=None, minlength=None, maxlength=None, dtype=dtypes.int32, name=None, axis=None, binary_output=False): """Counts the number of occurrences of each value in an integer array. If `minlength` and `maxlength` are not given, returns a vector with length `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. If `weights` are non-None, then index `i` of the output stores the sum of the value in `weights` at each index where the corresponding value in `arr` is `i`. ```python values = tf.constant([1,1,2,3,2,4,4,5]) tf.math.bincount(values) #[0 2 2 1 2 1] ``` Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6 will be the vector length. Each bin value in the output indicates number of occurrences of the particular index. Here, index 1 in output has a value 2. This indicates value 1 occurs two times in `values`. ```python values = tf.constant([1,1,2,3,2,4,4,5]) weights = tf.constant([1,5,0,1,0,5,4,5]) tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5] ``` Bin will be incremented by the corresponding weight instead of 1. Here, index 1 in output has a value 6. This is the summation of weights corresponding to the value in `values`. **Bin-counting on a certain axis** This example takes a 2 dimensional input and returns a `Tensor` with bincounting on each sample. >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32) >>> tf.math.bincount(data, axis=-1) <tf.Tensor: shape=(2, 4), dtype=int32, numpy= array([[1, 1, 1, 1], [2, 1, 1, 0]], dtype=int32)> **Bin-counting with binary_output** This example gives binary output instead of counting the occurrence. >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32) >>> tf.math.bincount(data, axis=-1, binary_output=True) <tf.Tensor: shape=(2, 4), dtype=int32, numpy= array([[1, 1, 1, 1], [1, 1, 1, 0]], dtype=int32)> Args: arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted. These tensors must have a rank of 2 if `axis=-1`. weights: If non-None, must be the same shape as arr. For each value in `arr`, the bin will be incremented by the corresponding weight instead of 1. minlength: If given, ensures the output has length at least `minlength`, padding with zeros at the end if necessary. maxlength: If given, skips values in `arr` that are equal or greater than `maxlength`, ensuring that the output has length at most `maxlength`. dtype: If `weights` is None, determines the type of the output bins. name: A name scope for the associated operations (optional). axis: The axis to slice over. Axes at and below `axis` will be flattened before bin counting. Currently, only `0`, and `-1` are supported. If None, all axes will be flattened (identical to passing `0`). binary_output: If True, this op will output 1 instead of the number of times a token appears (equivalent to one_hot + reduce_any instead of one_hot + reduce_add). Defaults to False. Returns: A vector with the same dtype as `weights` or the given `dtype`. The bin values. Raises: `InvalidArgumentError` if negative values are provided as an input. """ name = "bincount" if name is None else name with ops.name_scope(name): # Somehow forward compatible needs to be False. if not binary_output and axis is None: arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32) array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0 output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * ( math_ops.reduce_max(arr) + 1) if minlength is not None: minlength = ops.convert_to_tensor(minlength, name="minlength", dtype=dtypes.int32) output_size = gen_math_ops.maximum(minlength, output_size) if maxlength is not None: maxlength = ops.convert_to_tensor(maxlength, name="maxlength", dtype=dtypes.int32) output_size = gen_math_ops.minimum(maxlength, output_size) if weights is not None: weights = ops.convert_to_tensor(weights, name="weights") return gen_math_ops.unsorted_segment_sum( weights, arr, output_size) weights = constant_op.constant([], dtype) arr = array_ops.reshape(arr, [-1]) return gen_math_ops.bincount(arr, output_size, weights) if not isinstance(arr, sparse_tensor.SparseTensor): arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr, name="arr") if weights is not None: if not isinstance(weights, sparse_tensor.SparseTensor): weights = ragged_tensor.convert_to_tensor_or_ragged_tensor( weights, name="weights") if weights is not None and binary_output: raise ValueError( "Arguments `binary_output` and `weights` are mutually " "exclusive. Please specify only one.") if not arr.dtype.is_integer: arr = math_ops.cast(arr, dtypes.int32) if axis is None: axis = 0 if axis not in [0, -1]: raise ValueError( f"Unsupported value for argument axis={axis}. Only 0 and" " -1 are currently supported.") if isinstance(arr, ragged_tensor.RaggedTensor): array_is_nonempty = math_ops.reduce_prod( array_ops.shape(arr.values)) > 0 else: array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0 if isinstance(arr, sparse_tensor.SparseTensor): output_size = math_ops.cast(array_is_nonempty, arr.dtype) * ( math_ops.reduce_max(arr.values) + 1) else: output_size = math_ops.cast( array_is_nonempty, arr.dtype) * (math_ops.reduce_max(arr) + 1) if minlength is not None: minlength = ops.convert_to_tensor(minlength, name="minlength", dtype=arr.dtype) output_size = gen_math_ops.maximum(minlength, output_size) if maxlength is not None: maxlength = ops.convert_to_tensor(maxlength, name="maxlength", dtype=arr.dtype) output_size = gen_math_ops.minimum(maxlength, output_size) if axis == 0: if isinstance(arr, sparse_tensor.SparseTensor): if weights is not None: weights = validate_sparse_weights(arr, weights, dtype) arr = arr.values elif isinstance(arr, ragged_tensor.RaggedTensor): if weights is not None: weights = validate_ragged_weights(arr, weights, dtype) arr = arr.values else: if weights is not None: weights = array_ops.reshape(weights, [-1]) arr = array_ops.reshape(arr, [-1]) if isinstance(arr, sparse_tensor.SparseTensor): weights = validate_sparse_weights(arr, weights, dtype) return gen_math_ops.sparse_bincount(indices=arr.indices, values=arr.values, dense_shape=arr.dense_shape, size=output_size, weights=weights, binary_output=binary_output) elif isinstance(arr, ragged_tensor.RaggedTensor): weights = validate_ragged_weights(arr, weights, dtype) return gen_math_ops.ragged_bincount(splits=arr.row_splits, values=arr.values, size=output_size, weights=weights, binary_output=binary_output) else: weights = validate_dense_weights(arr, weights, dtype) return gen_math_ops.dense_bincount(input=arr, size=output_size, weights=weights, binary_output=binary_output)
def per_step_batch_loss(self, features, mode, state): """Computes predictions, losses, and intermediate model states. Args: features: A dictionary with times, values, and (optionally) exogenous regressors. See `define_loss`. mode: The tf.estimator.ModeKeys mode to use (TRAIN, EVAL, INFER). state: Model-dependent state, each with size [batch size x ...]. The number and type will typically be fixed by the model (for example a mean and variance). Returns: A tuple of (loss, filtered_states, predictions) loss: Average loss values across the batch. filtered_states: For each Tensor in `state` with shape [batch size x ...], `filtered_states` has a Tensor with shape [batch size x window size x ...] with filtered state for each part of the batch and window. predictions: A dictionary with model-dependent one-step-ahead (or at-least-one-step-ahead with missing values) predictions, with keys indicating the type of prediction and values having shape [batch size x window size x ...]. For example state space models provide "mean", "covariance", and "log_likelihood". """ self._check_graph_initialized() times = math_ops.cast(features[TrainEvalFeatures.TIMES], dtype=dtypes.int64) values = math_ops.cast(features[TrainEvalFeatures.VALUES], dtype=self.dtype) exogenous_regressors = self._process_exogenous_features( times=times, features={ key: value for key, value in features.items() if key not in [TrainEvalFeatures.TIMES, TrainEvalFeatures.VALUES] }) def _batch_loss_filtering_step(step_number, current_times, state): """Make a prediction and update it based on data.""" current_values = values[:, step_number, :] state = self._apply_exogenous_update( step_number=step_number, current_times=current_times, state=state, raw_features=features, embedded_exogenous_regressors=exogenous_regressors) predicted_state, predictions = self._prediction_step( current_times=current_times, state=state) filtered_state, outputs = self._filtering_step( current_times=current_times, current_values=current_values, state=predicted_state, predictions=predictions) return filtered_state, outputs state, outputs = self._state_update_loop( times=times, state=state, state_update_fn=_batch_loss_filtering_step, outputs=["loss"] + self._train_output_names) outputs["loss"].set_shape(times.get_shape()) loss_sum = math_ops.reduce_sum(outputs["loss"]) per_observation_loss = (loss_sum / math_ops.cast( math_ops.reduce_prod(array_ops.shape(times)), dtype=self.dtype)) per_observation_loss += self._loss_additions(times, values, mode) # Since we have window-level additions to the loss, its per-step value is # misleading, so we avoid returning it. del outputs["loss"] return per_observation_loss, state, outputs
def _tf_reduce(self, x, reduction_axes, keepdims): return math_ops.reduce_prod(x, reduction_axes, keepdims)