def _create_high_dimensional_ragged_dataset(self, strategy, include_weights=False, weight=0.5): ragged_features = ( ragged_tensor.RaggedTensor( row_lengths=self.feature_watched_row_lengths_high_dimensional, values=self.feature_watched_values_high_dimensional), ragged_tensor.RaggedTensor( row_lengths=self. feature_favorited_row_lengths_high_dimensional, values=self.feature_favorited_values_high_dimensional), ragged_tensor.RaggedTensor( row_lengths=self.feature_friends_row_lengths_high_dimensional, values=self.feature_friends_values_high_dimensional)) if include_weights: weights = [] for ragged in ragged_features: values = ( array_ops.ones_like(ragged.values, dtype=dtypes.float32) * weight) weights.append( ragged_tensor.RaggedTensor( row_lengths=ragged.row_lengths(), values=values)) ragged_features = (ragged_features, tuple(weights)) dataset = dataset_ops.DatasetV2.from_tensors(ragged_features) # Data is batched to self.data_batch_size, rebatch to global batch size. return dataset.unbatch().repeat().batch(self.batch_size * strategy.num_replicas_in_sync, drop_remainder=True)
def placeholder(dtype, ragged_rank, value_shape=None, name=None): """Creates a placeholder for a `tf.RaggedTensor` that will always be fed. **Important**: This ragged tensor will produce an error if evaluated. Its value must be fed using the `feed_dict` optional argument to `Session.run()`, `Tensor.eval()`, or `Operation.run()`. @compatibility{eager} Placeholders are not compatible with eager execution. Args: dtype: The data type for the `RaggedTensor`. ragged_rank: The ragged rank for the `RaggedTensor` value_shape: The shape for individual flat values in the `RaggedTensor`. name: A name for the operation (optional). Returns: A `RaggedTensor` that may be used as a handle for feeding a value, but not evaluated directly. Raises: RuntimeError: if eager execution is enabled """ if ragged_rank == 0: return array_ops.placeholder(dtype, value_shape, name) with ops.name_scope(name, "RaggedPlaceholder", []): flat_shape = tensor_shape.TensorShape([None]).concatenate(value_shape) result = array_ops.placeholder(dtype, flat_shape, "flat_values") for i in reversed(range(ragged_rank)): row_splits = array_ops.placeholder(dtypes.int64, [None], "row_splits_%d" % i) result = ragged_tensor.RaggedTensor(result, row_splits, internal=True) return result
def from_row_limits(values, row_limits, name=None): """Creates a `RaggedTensor` with rows partitioned by `row_limits`. Equivalent to: `from_row_splits(values, concat([0, row_limits]))`. Args: values: A potentially ragged tensor with shape `[nvals, ...]`. row_limits: A 1-D int64 tensor with shape `[nrows]`. Must be sorted in ascending order. If `nrows>0`, then `row_limits[-1]` must be `nvals`. name: A name prefix for the RaggedTensor (optional). Returns: A `RaggedTensor`. `result.rank = values.rank + 1`. `result.ragged_rank = values.ragged_rank + 1`. #### Example: ```python >>> rt = ragged.from_row_limits( ... values=[3, 1, 4, 1, 5, 9, 2, 6], ... row_limits=[4, 4, 7, 8, 8]) >>> rt.eval().tolist() [[3, 1, 4, 1], [], [5, 9, 2], [6], []] ``` """ with ops.name_scope(name, 'RaggedFromRowLimits', [values, row_limits]): values = convert_to_tensor_or_ragged_tensor(values, name='values') row_limits = ops.convert_to_tensor(row_limits, dtypes.int64, 'row_limits') row_limits.shape.assert_has_rank(1) zero = array_ops.zeros([1], dtypes.int64) row_splits = array_ops.concat([zero, row_limits], axis=0) return ragged_tensor.RaggedTensor(values=values, row_splits=row_splits, internal=True)
def field_value(self, field_name): """See StructuredTensor.field_value for documentation.""" if isinstance(field_name, (list, tuple)): value = self for f in field_name: value = value.field_value(f) return value return ragged_tensor.RaggedTensor( values=self._values.field_value(field_name), row_splits=self._row_splits, cached_row_lengths=self._row_lengths, cached_value_rowids=self._value_rowids, cached_nrows=self._nrows, uniform_row_length=self._uniform_row_length, internal=True)
def from_row_splits(values, row_splits, name=None): """Creates a `RaggedTensor` with rows partitioned by `row_splits`. The returned `RaggedTensor` corresponds with the python list defined by: ```python result = [values[row_splits[i]:row_splits[i + 1]] for i in range(len(row_splits) - 1)] ``` Args: values: A potentially ragged tensor with shape `[nvals, ...]`. row_splits: A 1-D int64 tensor with shape `[nrows+1]`. Must not be empty, and must be sorted in ascending order. `row_splits[0]` must be zero and `row_splits[-1]` must be `nvals`. name: A name prefix for the RaggedTensor (optional). Returns: A `RaggedTensor`. `result.rank = values.rank + 1`. `result.ragged_rank = values.ragged_rank + 1`. Raises: ValueError: If `row_splits` is an empty list. #### Example: ```python >>> rt = ragged.from_row_splits( ... values=[3, 1, 4, 1, 5, 9, 2, 6], ... row_splits=[0, 4, 4, 7, 8, 8]) >>> rt.eval().tolist() [[3, 1, 4, 1], [], [5, 9, 2], [6], []] ``` """ if isinstance(row_splits, (list, tuple)) and not row_splits: raise ValueError('row_splits tensor may not be empty.') with ops.name_scope(name, 'RaggedFromRowSplits', [values, row_splits]): values = convert_to_tensor_or_ragged_tensor(values, name='values') row_splits = ops.convert_to_tensor(row_splits, dtypes.int64, 'row_splits') row_splits.shape.assert_has_rank(1) return ragged_tensor.RaggedTensor(values=values, row_splits=row_splits, internal=True)
def from_row_lengths(values, row_lengths, name=None): """Creates a `RaggedTensor` with rows partitioned by `row_lengths`. The returned `RaggedTensor` corresponds with the python list defined by: ```python result = [[values.pop(0) for i in range(length)] for length in row_lengths] ``` Args: values: A potentially ragged tensor with shape `[nvals, ...]`. row_lengths: A 1-D int64 tensor with shape `[nrows]`. Must be nonnegative. `sum(row_lengths)` must be `nvals`. name: A name prefix for the RaggedTensor (optional). Returns: A `RaggedTensor`. `result.rank = values.rank + 1`. `result.ragged_rank = values.ragged_rank + 1`. #### Example: ```python >>> rt = ragged.from_row_lengths( ... values=[3, 1, 4, 1, 5, 9, 2, 6], ... row_lengths=[4, 0, 3, 1, 0]) >>> rt.eval().tolist() [[3, 1, 4, 1], [], [5, 9, 2], [6], []] ``` """ with ops.name_scope(name, 'RaggedFromRowLengths', [values, row_lengths]): values = convert_to_tensor_or_ragged_tensor(values, name='values') row_lengths = ops.convert_to_tensor(row_lengths, dtypes.int64, 'row_lengths') row_lengths.shape.assert_has_rank(1) row_limits = math_ops.cumsum(row_lengths) row_splits = array_ops.concat([[0], row_limits], axis=0) return ragged_tensor.RaggedTensor(values=values, row_splits=row_splits, cached_row_lengths=row_lengths, internal=True)
def from_value_rowids(values, value_rowids, nrows=None, name=None): """Creates a `RaggedTensor` with rows partitioned by `value_rowids`. The returned `RaggedTensor` corresponds with the python list defined by: ```python result = [[values[i] for i in range(len(values)) if value_rowids[i] == row] for row in range(nrows)] ``` Warning: currently, this needs to cast value_rowids to int64 before converting, since `tf.bincount` only supports `int32`. Args: values: A potentially ragged tensor with shape `[nvals, ...]`. value_rowids: A 1-D int64 tensor with shape `[nvals]`, which corresponds one-to-one with `values`, and specifies each value's row index. Must be nonnegative, and must be sorted in ascending order. nrows: An int64 scalar specifying the number of rows. This should be specified if the `RaggedTensor` may containing empty training rows. Must be greater than `value_rowids[-1]` (or zero if `value_rowids` is empty). Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty). name: A name prefix for the RaggedTensor (optional). Returns: A `RaggedTensor`. `result.rank = values.rank + 1`. `result.ragged_rank = values.ragged_rank + 1`. Raises: ValueError: If `nrows` is incompatible with `value_rowids`. #### Example: ```python >>> rt = ragged.from_value_rowids( ... values=[3, 1, 4, 1, 5, 9, 2, 6], ... value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], ... nrows=5) >>> rt.eval().tolist() [[3, 1, 4, 1], [], [5, 9, 2], [6], []] ``` """ with ops.name_scope(name, 'RaggedFromValueRowIds', [values, value_rowids, nrows]): values = convert_to_tensor_or_ragged_tensor(values, name='values') value_rowids = ops.convert_to_tensor(value_rowids, dtypes.int64, name='value_rowids') if nrows is None: const_rowids = tensor_util.constant_value(value_rowids) if const_rowids is None: nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1 const_nrows = None else: const_nrows = const_rowids[ -1] + 1 if const_rowids.size > 0 else 0 nrows = ops.convert_to_tensor(const_nrows, dtypes.int64, name='nrows') else: nrows = ops.convert_to_tensor(nrows, dtypes.int64, 'nrows') const_nrows = tensor_util.constant_value(nrows) if const_nrows is not None: if const_nrows < 0: raise ValueError('Expected nrows >= 0; got %d' % const_nrows) const_rowids = tensor_util.constant_value(value_rowids) if const_rowids is not None and const_rowids.size > 0: if not const_nrows >= const_rowids[-1] + 1: raise ValueError( 'Expected nrows >= value_rowids[-1] + 1; got nrows=%d, ' 'value_rowids[-1]=%d' % (const_nrows, const_rowids[-1])) value_rowids.shape.assert_has_rank(1) nrows.shape.assert_has_rank(0) values.shape[:1].assert_is_compatible_with(value_rowids.shape) # Convert value_rowids & nrows to row_splits. # Note: we don't use segment_ids_to_row_splits() here because we want # to save the intermediate value `row_lengths`, so we can cache it. # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the cast # (Remove the warning in the docstring when we do.) value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32) nrows_int32 = math_ops.cast(nrows, dtypes.int32) row_lengths = math_ops.bincount(value_rowids_int32, minlength=nrows_int32, maxlength=nrows_int32, dtype=dtypes.int64) row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0) if const_nrows is not None: row_lengths.set_shape([const_nrows]) row_splits.set_shape([const_nrows + 1]) return ragged_tensor.RaggedTensor(values, row_splits, cached_row_lengths=row_lengths, cached_value_rowids=value_rowids, cached_nrows=nrows, internal=True)