def get_issm_coeff( self, seasonal_indicators: Tensor # (batch_size, time_length) ) -> Tuple[Tensor, Tensor, Tensor]: F = getF(seasonal_indicators) emission_coeff_ls, transition_coeff_ls, innovation_coeff_ls = zip( self.nonseasonal_issm.get_issm_coeff(seasonal_indicators), *[ issm.get_issm_coeff( seasonal_indicators.slice_axis(axis=-1, begin=ix, end=ix + 1)) for ix, issm in enumerate(self.seasonal_issms) ], ) # stack emission and innovation coefficients emission_coeff = F.concat(*emission_coeff_ls, dim=-1) innovation_coeff = F.concat(*innovation_coeff_ls, dim=-1) # transition coefficient is block diagonal! transition_coeff = _make_block_diagonal(transition_coeff_ls) return emission_coeff, transition_coeff, innovation_coeff
def lowrank_log_likelihood( rank: int, mu: Tensor, D: Tensor, W: Tensor, x: Tensor ) -> Tensor: F = getF(mu) dim = F.ones_like(mu).sum(axis=-1).max() dim_factor = dim * math.log(2 * math.pi) batch_capacitance_tril = capacitance_tril(F=F, rank=rank, W=W, D=D) log_det_factor = log_det( F=F, batch_D=D, batch_capacitance_tril=batch_capacitance_tril ) mahalanobis_factor = mahalanobis_distance( F=F, W=W, D=D, capacitance_tril=batch_capacitance_tril, x=x - mu ) ll: Tensor = -0.5 * ( F.broadcast_add(dim_factor, log_det_factor) + mahalanobis_factor ) return ll
def __init__( self, mu: Tensor, L: Tensor, F=None, float_type: DType = np.float32 ) -> None: self.mu = mu self.F = F if F else getF(mu) self.L = L self.float_type = float_type
def s(mu: Tensor, D: Tensor, W: Tensor) -> Tensor: F = getF(mu) samples_D = F.sample_normal( mu=F.zeros_like(mu), sigma=F.ones_like(mu), dtype=dtype ) cov_D = D.sqrt() * samples_D # dummy only use to get the shape (..., rank, 1) dummy_tensor = F.linalg_gemm2( W, mu.expand_dims(axis=-1), transpose_a=True ).squeeze(axis=-1) samples_W = F.sample_normal( mu=F.zeros_like(dummy_tensor), sigma=F.ones_like(dummy_tensor), dtype=dtype, ) cov_W = F.linalg_gemm2(W, samples_W.expand_dims(axis=-1)).squeeze( axis=-1 ) samples = mu + cov_D + cov_W return samples
def __init__(self, alpha: Tensor, F=None, float_type: DType = np.float32) -> None: self.alpha = alpha self.F = F if F else getF(alpha) self.float_type = float_type
def __init__(self, dim: int, rank: int, mu: Tensor, D: Tensor, W: Tensor) -> None: self.dim = dim self.rank = rank self.mu = mu self.D = D self.W = W self.F = getF(mu) self.Cov = None
def s(alpha: Tensor) -> Tensor: F = getF(alpha) samples_gamma = F.sample_gamma(alpha=alpha, beta=F.ones_like(alpha), dtype=dtype) sum_gamma = F.sum(samples_gamma, axis=-1, keepdims=True) samples_s = F.broadcast_div(samples_gamma, sum_gamma) return samples_s
def s(alpha: Tensor) -> Tensor: F = getF(alpha) samples_gamma = F.sample_gamma(alpha=alpha, beta=F.ones_like(alpha), dtype=dtype) sum_gamma = F.sum(samples_gamma, axis=-1, keepdims=True) samples_s = F.broadcast_div(samples_gamma, sum_gamma) cat_samples = F.sample_multinomial(samples_s, shape=n_trials) return F.sum(F.one_hot(cat_samples, dim), axis=-2)
def __init__( self, gamma: Tensor, slopes: Tensor, knot_spacings: Tensor, F=None ) -> None: self.F = F if F else getF(gamma) self.gamma = gamma # Since most of the calculations are easily expressed in the original parameters, we transform the # learned parameters back self.b, self.knot_positions = PiecewiseLinear._to_orig_params( self.F, slopes, knot_spacings )
def crps(self, y: Tensor) -> Tensor: # TODO: use event_shape F = getF(y) x = y scale = 1.0 for t in self.transforms[::-1]: assert isinstance( t, AffineTransformation), "Not an AffineTransformation" x = t.f_inv(x) scale *= t.scale p = self.base_distribution.crps(x) return F.broadcast_mul(p, scale)
def __init__( self, dim: int, n_trials: int, alpha: Tensor, F=None, float_type: DType = np.float32, ) -> None: self.dim = dim self.n_trials = n_trials self.alpha = alpha self.F = F if F else getF(alpha) self.float_type = float_type
def distribution( self, feat_static_cat: Tensor, feat_static_real: Tensor, past_time_feat: Tensor, past_target: Tensor, past_observed_values: Tensor, future_time_feat: Tensor, future_target: Tensor, future_observed_values: Tensor, ) -> Distribution: """ Returns the distribution predicted by the model on the range of past_target and future_target. The distribution is obtained by unrolling the network with the true target, this is also the distribution that is being minimized during training. This can be used in anomaly detection, see for instance examples/anomaly_detection.py. Input arguments are the same as for the hybrid_forward method. Returns ------- Distribution a distribution object whose mean has shape: (batch_size, context_length + prediction_length). """ # unroll the decoder in "training mode" # i.e. by providing future data as well F = getF(feat_static_cat) rnn_outputs, _, scale, _ = self.unroll_encoder( F=F, feat_static_cat=feat_static_cat, feat_static_real=feat_static_real, past_time_feat=past_time_feat, past_target=past_target, past_observed_values=past_observed_values, future_time_feat=future_time_feat, future_target=future_target, ) distr_args_m = self.proj_distr_args_m(rnn_outputs) distr_args_q = self.proj_distr_args_q(rnn_outputs) return ( self.distr_output_m.distribution(distr_args_m, scale=scale), self.distr_output_q.distribution(distr_args_q, scale=scale), )
def distribution( self, feat_static_cat: Tensor, past_time_feat: Tensor, past_target: Tensor, past_observed_values: Tensor, future_time_feat: Tensor, future_target: Tensor, future_observed_values: Tensor, ) -> Distribution: """ Returns the distribution predicted by the model on the range of past_target and future_target. The distribution is obtained by unrolling the network with the true target, this is also the distribution that is being minimized during training. This can be used in anomaly detection, see for instance examples/anomaly_detection.py. Parameters ---------- feat_static_cat past_time_feat past_target past_observed_values future_time_feat future_target future_observed_values Returns ------- sample_paths : Distribution a distribution object whose mean has shape: (batch_size, context_length + prediction_length). """ # unroll the decoder in "training mode", i.e. by providing future data as well F = getF(feat_static_cat) rnn_outputs, _, scale, _ = self.unroll_encoder( F=F, feat_static_cat=feat_static_cat, past_time_feat=past_time_feat, past_target=past_target, past_observed_values=past_observed_values, future_time_feat=future_time_feat, future_target=future_target, ) distr_args = self.proj_distr_args(rnn_outputs) return self.distr_output.distribution(distr_args, scale=scale)
def __init__( self, emission_coeff: Tensor, transition_coeff: Tensor, innovation_coeff: Tensor, noise_std: Tensor, residuals: Tensor, prior_mean: Tensor, prior_cov: Tensor, latent_dim: int, output_dim: int, seq_length: int, F=None, ) -> None: self.latent_dim = latent_dim self.output_dim = output_dim self.seq_length = seq_length # Split coefficients along time axis for easy access # emission_coef[t]: (batch_size, obs_dim, latent_dim) self.emission_coeff = emission_coeff.split( axis=1, num_outputs=self.seq_length, squeeze_axis=True ) # innovation_coef[t]: (batch_size, latent_dim) self.innovation_coeff = innovation_coeff.split( axis=1, num_outputs=self.seq_length, squeeze_axis=False ) # transition_coeff: (batch_size, latent_dim, latent_dim) self.transition_coeff = transition_coeff.split( axis=1, num_outputs=self.seq_length, squeeze_axis=True ) # noise_std[t]: (batch_size, obs_dim) self.noise_std = noise_std.split( axis=1, num_outputs=self.seq_length, squeeze_axis=True ) # residuals[t]: (batch_size, obs_dim) self.residuals = residuals.split( axis=1, num_outputs=self.seq_length, squeeze_axis=True ) self.prior_mean = prior_mean self.prior_cov = prior_cov self.F = F if F else getF(noise_std)
def _make_block_diagonal(blocks: List[Tensor]) -> Tensor: assert (len(blocks) > 0), "You need at least one tensor to make a block-diagonal tensor" if len(blocks) == 1: return blocks[0] F = getF(blocks[0]) # transition coefficient is block diagonal! block_diagonal = _make_2_block_diagonal(F, blocks[0], blocks[1]) for i in range(2, len(blocks)): block_diagonal = _make_2_block_diagonal(F=F, left=block_diagonal, right=blocks[i]) return block_diagonal
def crps(self, y: Tensor) -> Tensor: # TODO: use event_shape F = getF(y) for t in self.transforms[::-1]: assert isinstance( t, AffineTransformation), "Not an AffineTransformation" assert (t.scale is not None and t.loc is None), "Not a scaling transformation" scale = t.scale x = t.f_inv(y) # (..., 1) p = self.base_distribution.crps(x) return F.broadcast_mul(p, scale)
def emission_coeff( self, seasonal_indicators: Tensor # (batch_size, time_length) ) -> Tensor: F = getF(seasonal_indicators) _emission_coeff = F.ones(shape=(1, 1, 1, self.latent_dim())) # get the right shape: (batch_size, seq_length, obs_dim, latent_dim) zeros = _broadcast_param( F.zeros_like( seasonal_indicators.slice_axis(axis=-1, begin=0, end=1).squeeze(axis=-1)), axes=[2, 3], sizes=[1, self.latent_dim()], ) return _emission_coeff.broadcast_like(zeros)
def __init__(self, amplitude: Tensor, length_scale: Tensor, F=None) -> None: """ Parameters ---------- amplitude : Tensor RBF kernel amplitude hyper-parameter of shape (batch_size, 1, 1). length_scale : Tensor RBF kernel length scale hyper-parameter of of shape (batch_size, 1, 1). F : ModuleType A module that can either refer to the Symbol API or the NDArray API in MXNet. """ self.F = F if F else getF(amplitude) self.amplitude = amplitude self.length_scale = length_scale
def transition_coeff( self, seasonal_indicators: Tensor # (batch_size, time_length) ) -> Tensor: F = getF(seasonal_indicators) _transition_coeff = (F.eye( self.latent_dim()).expand_dims(axis=0).expand_dims(axis=0)) # get the right shape: (batch_size, seq_length, latent_dim, latent_dim) zeros = _broadcast_param( F.zeros_like( seasonal_indicators.slice_axis(axis=-1, begin=0, end=1).squeeze(axis=-1)), axes=[2, 3], sizes=[self.latent_dim(), self.latent_dim()], ) return _transition_coeff.broadcast_like(zeros)
def distr( self, rnn_outputs: Tensor, time_features: Tensor, scale: Tensor, lags_scaled: Tensor, target_dimension_indicator: Tensor, seq_len: int, ): """ Returns the distribution of GPVAR with respect to the RNN outputs. Parameters ---------- rnn_outputs Outputs of the unrolled RNN (batch_size, seq_len, num_cells) time_features Dynamic time features (batch_size, seq_len, num_features) scale Mean scale for each time series (batch_size, 1, target_dim) lags_scaled Scaled lags used for RNN input (batch_size, seq_len, target_dim, num_lags) target_dimension_indicator Indices of the target dimension (batch_size, target_dim) seq_len Length of the sequences Returns ------- distr Distribution instance distr_args Distribution arguments """ F = getF(rnn_outputs) # (batch_size, target_dim, embed_dim) index_embeddings = self.embed(target_dimension_indicator) # broadcast to (batch_size, seq_len, target_dim, embed_dim) repeated_index_embeddings = index_embeddings.expand_dims( axis=1 ).repeat(axis=1, repeats=seq_len) # broadcast to (batch_size, seq_len, target_dim, num_features) time_features = time_features.expand_dims(axis=2).repeat( axis=2, repeats=self.target_dim_sample ) # (batch_size, seq_len, target_dim, embed_dim + num_cells + num_inputs) distr_input = F.concat( rnn_outputs, repeated_index_embeddings, time_features, dim=-1 ) # TODO 1 pass inputs in proj args distr_args = self.proj_dist_args(distr_input) # compute likelihood of target given the predicted parameters distr = self.distr_output.distribution( distr_args, scale=scale, dim=self.target_dim_sample ) return distr, distr_args
def __init__(self, mu: Tensor, L: Tensor, F=None) -> None: self.mu = mu self.F = F if F else getF(mu) self.L = L
def innovation_coeff(self, seasonal_indicators: Tensor) -> Tensor: F = getF(seasonal_indicators) # seasonal_indicators = F.modulo(seasonal_indicators - 1, self.latent_dim) return F.one_hot(seasonal_indicators, depth=self.latent_dim()).squeeze(axis=2)
def emission_coeff(self, seasonal_indicators: Tensor) -> Tensor: F = getF(seasonal_indicators) return F.one_hot(seasonal_indicators, depth=self.latent_dim())
def __init__( self, sigma: Tensor, kernel: Kernel, prediction_length: Optional[int] = None, context_length: Optional[int] = None, num_samples: Optional[int] = None, ctx: mx.Context = mx.Context("cpu"), float_type: DType = np.float64, jitter_method: str = "iter", max_iter_jitter: int = 10, neg_tol: float = -1e-8, diag_weight: float = 1e-6, increase_jitter: int = 10, sample_noise: bool = True, F=None, ) -> None: r""" Parameters ---------- sigma Noise parameter of shape (batch_size, num_data_points, 1), where num_data_points is the number of rows in the Cholesky matrix. kernel Kernel object. prediction_length Prediction length. context_length Training length. num_samples The number of samples to be drawn. ctx Determines whether to compute on the cpu or gpu. float_type Determines whether to use single or double precision. jitter_method Iteratively jitter method or use eigenvalue decomposition depending on problem size. max_iter_jitter Maximum number of iterations for jitter to iteratively make the matrix positive definite. neg_tol Parameter in the jitter methods to eliminate eliminate matrices with diagonal elements smaller than this when checking if a matrix is positive definite. diag_weight Multiple of mean of diagonal entries to initialize the jitter. increase_jitter Each iteration multiply by jitter by this amount sample_noise Boolean to determine whether to add :math:`\sigma^2I` to the predictive covariance matrix. F A module that can either refer to the Symbol API or the NDArray API in MXNet. """ assert (prediction_length is None or prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert (num_samples is None or num_samples > 0), "The value of `num_samples` should be > 0" self.sigma = sigma self.kernel = kernel self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else prediction_length) self.num_samples = num_samples self.F = F if F else getF(sigma) self.ctx = ctx self.float_type = float_type self.jitter_method = jitter_method self.max_iter_jitter = max_iter_jitter self.neg_tol = neg_tol self.diag_weight = diag_weight self.increase_jitter = increase_jitter self.sample_noise = sample_noise