def test_empirical_statistics_accepts_masked_values(self): # Ensure that masks broadcast over batch shape by creating a batch of # time series. time_series = np.random.randn(3, 2, 5) mask = np.array([True, False, False, True, False]) masked_series = missing_values_util.MaskedTimeSeries( time_series=time_series, is_missing=mask) mean, stddev, initial = self.evaluate( sts_util.empirical_statistics(masked_series)) broadcast_mask = np.broadcast_to(mask, time_series.shape) unmasked_series = time_series[~broadcast_mask].reshape([3, 2, 3]) unmasked_mean, unmasked_stddev, unmasked_initial = self.evaluate( sts_util.empirical_statistics(unmasked_series)) self.assertAllClose(mean, unmasked_mean) self.assertAllClose(stddev, unmasked_stddev) self.assertAllClose(initial, unmasked_initial) # Run the same tests without batch shape. unbatched_time_series = time_series[0, 0, :] masked_series = missing_values_util.MaskedTimeSeries( time_series=unbatched_time_series, is_missing=mask) mean, stddev, initial = self.evaluate( sts_util.empirical_statistics(masked_series)) unmasked_mean, unmasked_stddev, unmasked_initial = self.evaluate( sts_util.empirical_statistics(unbatched_time_series[~mask])) self.assertAllClose(mean, unmasked_mean) self.assertAllClose(stddev, unmasked_stddev) self.assertAllClose(initial, unmasked_initial)
def pad_batch_dimension_for_multiple_chains( observed_time_series, model, chain_batch_shape): """"Expand the observed time series with extra batch dimension(s).""" # Running with multiple chains introduces an extra batch dimension. In # general we also need to pad the observed time series with a matching batch # dimension. # # For example, suppose our model has batch shape [3, 4] and # the observed time series has shape `concat([[5], [3, 4], [100])`, # corresponding to `sample_shape`, `batch_shape`, and `num_timesteps` # respectively. The model will produce distributions with batch shape # `concat([chain_batch_shape, [3, 4]])`, so we pad `observed_time_series` to # have matching shape `[5, 1, 3, 4, 100]`, where the added `1` dimension # between the sample and batch shapes will broadcast to `chain_batch_shape`. [ # Extract mask and guarantee `event_ndims=2`. observed_time_series, is_missing ] = canonicalize_observed_time_series_with_mask(observed_time_series) event_ndims = 2 # event_shape = [num_timesteps, observation_size=1] model_batch_ndims = ( model.batch_shape.ndims if model.batch_shape.ndims is not None else tf.shape(model.batch_shape_tensor())[0]) # Compute ndims from chain_batch_shape. chain_batch_shape = tf.convert_to_tensor( value=chain_batch_shape, name='chain_batch_shape', dtype=tf.int32) if not chain_batch_shape.shape.is_fully_defined(): raise ValueError('Batch shape must have static rank. (given: {})'.format( chain_batch_shape)) if chain_batch_shape.shape.ndims == 0: # expand int `k` to `[k]`. chain_batch_shape = chain_batch_shape[tf.newaxis] chain_batch_ndims = tf.compat.dimension_value(chain_batch_shape.shape[0]) def do_padding(observed_time_series_tensor): current_sample_shape = tf.shape( observed_time_series_tensor)[:-(model_batch_ndims + event_ndims)] current_batch_and_event_shape = tf.shape( observed_time_series_tensor)[-(model_batch_ndims + event_ndims):] return tf.reshape( tensor=observed_time_series_tensor, shape=tf.concat([ current_sample_shape, tf.ones([chain_batch_ndims], dtype=tf.int32), current_batch_and_event_shape], axis=0)) # Padding is only needed if the observed time series has sample shape. observed_time_series = ps.cond(ps.rank(observed_time_series) > model_batch_ndims + event_ndims, lambda: do_padding(observed_time_series), lambda: observed_time_series) if is_missing is not None: is_missing = ps.cond(ps.rank(is_missing) > model_batch_ndims + event_ndims, lambda: do_padding(is_missing), lambda: is_missing) return missing_values_util.MaskedTimeSeries(observed_time_series, is_missing=is_missing)
def canonicalize_observed_time_series_with_mask( maybe_masked_observed_time_series): """Extract a Tensor with canonical shape and optional mask. Args: maybe_masked_observed_time_series: a `Tensor`-like object with shape `[..., num_timesteps]` or `[..., num_timesteps, 1]`, or a `tfp.sts.MaskedTimeSeries` containing such an object, or a Pandas Series or DataFrame instance with set frequency (i.e., `.index.freq is not None`). Returns: masked_time_series: a `tfp.sts.MaskedTimeSeries` namedtuple, in which the `observed_time_series` is converted to `Tensor` with canonical shape `[..., num_timesteps, 1]`, and `is_missing` is either `None` or a boolean `Tensor`. """ with tf.name_scope('canonicalize_observed_time_series_with_mask'): is_missing_is_specified = hasattr(maybe_masked_observed_time_series, 'is_missing') if is_missing_is_specified: # Input is a MaskedTimeSeries. observed_time_series = ( maybe_masked_observed_time_series.time_series) is_missing = maybe_masked_observed_time_series.is_missing elif (hasattr(maybe_masked_observed_time_series, 'index') and hasattr(maybe_masked_observed_time_series, 'to_numpy')): # Input is a Pandas Series or DataFrame. index = maybe_masked_observed_time_series.index if hasattr(index, 'freq') and index.freq is None: raise ValueError('Pandas DataFrame or Series has a DatetimeIndex with ' 'no set frequency, but STS requires regularly spaced ' 'observations. Consider using ' '`tfp.sts.regularize_series` to infer a frequency and ' 'build a regularly spaced series (by marking ' 'unobserved steps as missing observations).') # When a DataFrame has multiple columns representing a batch of series, # we want shape `[batch_size, num_steps]` rather than vice versa. observed_time_series = np.squeeze(np.transpose( maybe_masked_observed_time_series.to_numpy())) else: observed_time_series = maybe_masked_observed_time_series observed_time_series = tf.convert_to_tensor(value=observed_time_series, name='observed_time_series') observed_time_series = _maybe_expand_trailing_dim(observed_time_series) # Treat `NaN` values as missing. if not is_missing_is_specified: is_missing = tf.math.is_nan(observed_time_series[..., 0]) is_missing_static = tf.get_static_value(is_missing) if is_missing_static is not None and not np.any(is_missing_static): is_missing = None if is_missing is not None: is_missing = tf.convert_to_tensor( value=is_missing, name='is_missing', dtype_hint=tf.bool) return missing_values_util.MaskedTimeSeries(observed_time_series, is_missing=is_missing)
def canonicalize_observed_time_series_with_mask( maybe_masked_observed_time_series): """Extract a Tensor with canonical shape and optional mask. Args: maybe_masked_observed_time_series: a `Tensor`-like object with shape `[..., num_timesteps]` or `[..., num_timesteps, 1]`, or a `tfp.sts.MaskedTimeSeries` containing such an object. Returns: masked_time_series: a `tfp.sts.MaskedTimeSeries` namedtuple, in which the `observed_time_series` is converted to `Tensor` with canonical shape `[..., num_timesteps, 1]`, and `is_missing` is either `None` or a boolean `Tensor`. """ with tf.name_scope('canonicalize_observed_time_series_with_mask'): if hasattr(maybe_masked_observed_time_series, 'is_missing'): observed_time_series = ( maybe_masked_observed_time_series.time_series) is_missing = maybe_masked_observed_time_series.is_missing else: observed_time_series = maybe_masked_observed_time_series is_missing = None observed_time_series = tf.convert_to_tensor(value=observed_time_series, name='observed_time_series') observed_time_series = _maybe_expand_trailing_dim(observed_time_series) if is_missing is not None: is_missing = tf.convert_to_tensor( value=is_missing, name='is_missing', dtype_hint=tf.bool) return missing_values_util.MaskedTimeSeries(observed_time_series, is_missing=is_missing)
def test_empirical_statistics_accepts_masked_values(self): # Ensure that masks broadcast over batch shape by creating a batch of # time series. time_series = np.random.randn(3, 2, 5) mask = np.array([[True, False, False, True, False], [True, True, True, True, True]]) masked_series = missing_values_util.MaskedTimeSeries( time_series=time_series, is_missing=mask) mean, stddev, initial = self.evaluate( sts_util.empirical_statistics(masked_series)) # Should return default values when the series is completely masked. self.assertAllClose(mean[:, 1], tf.zeros_like(mean[:, 1])) self.assertAllClose(stddev[:, 1], tf.ones_like(stddev[:, 1])) self.assertAllClose(initial[:, 1], tf.zeros_like(initial[:, 1])) # Otherwise, should return the actual mean/stddev/initial values. time_series = time_series[:, 0, :] mask = mask[0, :] broadcast_mask = np.broadcast_to(mask, time_series.shape) unmasked_series = time_series[~broadcast_mask].reshape([3, 3]) unmasked_mean, unmasked_stddev, unmasked_initial = self.evaluate( sts_util.empirical_statistics(unmasked_series)) self.assertAllClose(mean[:, 0], unmasked_mean) self.assertAllClose(stddev[:, 0], unmasked_stddev) self.assertAllClose(initial[:, 0], unmasked_initial) # Run the same tests without batch shape. unbatched_time_series = time_series[0, :] masked_series = missing_values_util.MaskedTimeSeries( time_series=unbatched_time_series, is_missing=mask) mean, stddev, initial = self.evaluate( sts_util.empirical_statistics(masked_series)) unmasked_mean, unmasked_stddev, unmasked_initial = self.evaluate( sts_util.empirical_statistics(unbatched_time_series[~mask])) self.assertAllClose(mean, unmasked_mean) self.assertAllClose(stddev, unmasked_stddev) self.assertAllClose(initial, unmasked_initial)