def test_static_values(self): # type checks for o in [object(), 1.2, LONG_MAX]: with pytest.raises(TypeError, match='xyz cannot be converted to int32'): _ = validate_n_samples(o, 'xyz') # value checks self.assertIsNone(validate_n_samples(None, 'xyz')) self.assertEqual(validate_n_samples(1, 'xyz'), 1) with pytest.raises(ValueError, match='xyz must be positive'): _ = validate_n_samples(0, 'xyz') with pytest.raises(ValueError, match='xyz must be positive'): _ = validate_n_samples(-1, 'xyz')
def get_score(self, x, y=None, n_z=None, mcmc_iteration=None, last_point_only=True): """ Get the reconstruction probability for `x` and `y`. The larger `reconstruction probability`, the less likely a point is anomaly. You may take the negative of the score, if you want something to directly indicate the severity of anomaly. Args: x (tf.Tensor): 2-D `float32` :class:`tf.Tensor`, the windows of KPI observations in a mini-batch. y (tf.Tensor): 2-D `int32` :class:`tf.Tensor`, the windows of missing point indicators in a mini-batch. n_z (int or None): Number of `z` samples to take for each `x`. (default :obj:`None`, one sample without explicit sampling dimension) mcmc_iteration (int or tf.Tensor): Iteration count for MCMC missing data imputation. (default :obj:`None`, no iteration) last_point_only (bool): Whether to obtain the reconstruction probability of only the last point in each window? (default :obj:`True`) Returns: tf.Tensor: The reconstruction probability, with the shape ``(len(x) - self.x_dims + 1,)`` if `last_point_only` is :obj:`True`, or ``(len(x) - self.x_dims + 1, self.x_dims)`` if `last_point_only` is :obj:`False`. This is because the first ``self.x_dims - 1`` points are not the last point of any window. """ with tf.name_scope('Donut.get_score'): # MCMC missing data imputation if y is not None and mcmc_iteration: x_r = iterative_masked_reconstruct( reconstruct=self.vae.reconstruct, x=x, mask=y, iter_count=mcmc_iteration, back_prop=False, ) else: x_r = x # get the reconstruction probability q_net = self.vae.variational(x=x_r, n_z=n_z) # notice: x=x_r p_net = self.vae.model(z=q_net['z'], x=x, n_z=n_z) # notice: x=x r_prob = p_net['x'].log_prob(group_ndims=0) if n_z is not None: n_z = validate_n_samples(n_z, 'n_z') assert_shape_op = tf.assert_equal( tf.shape(r_prob), tf.stack([n_z, tf.shape(x)[0], self.x_dims]), message='Unexpected shape of reconstruction prob' ) with tf.control_dependencies([assert_shape_op]): r_prob = tf.reduce_mean(r_prob, axis=0) if last_point_only: r_prob = r_prob[:, -1] return r_prob
def test_dynamic_values(self): # type checks for o in [ tf.constant(1.2, dtype=tf.float32), tf.constant(LONG_MAX, dtype=tf.int64) ]: with pytest.raises(TypeError, match='xyz cannot be converted to int32'): _ = validate_n_samples(o, 'xyz') # value checks with self.test_session(): self.assertEqual( validate_n_samples(tf.constant(1, dtype=tf.int32), 'xyz').eval(), 1) with pytest.raises(Exception, match='xyz must be positive'): _ = validate_n_samples(tf.constant(0, dtype=tf.int32), 'xyz').eval() with pytest.raises(Exception, match='xyz must be positive'): _ = validate_n_samples(tf.constant(-1, dtype=tf.int32), 'xyz').eval()
def get_training_loss(self, x, n_z=None): """ Get the training loss for this VAE. The variational solver is automatically chosen according to `z.is_reparameterized`, and the argument `n_z`, by the following rules: 1. If `z.is_reparameterized` is :obj:`True`, then: 1. If `n_z` > 1, use `iwae`. 2. If `n_z` == 1 or `n_z` is :obj:`None`, use `sgvb`. 2. If `z.is_reparameterized` is :obj:`False`, then: 1. If `n_z` > 1, use `vimco`. 2. If `n_z` == 1 or `n_z` is :obj:`None`, use `reinforce`. Dynamic `n_z` is not supported by this method. Also, Reweighted Wake-Sleep algorithm is not a choice of this method. To derive the training loss for either situation, use :meth:`chain` to obtain a :class:`~tfsnippet.variational.VariationalChain`, and further obtain the loss by `chain.vi.training.[algorithm]`. Args: x: The input observation `x`. n_z (int or None): Number of `z` samples to take. Must be :obj:`None` or a constant integer. Dynamic tensors are not accepted, since we cannot automatically choose a variational solver for undeterministic `n_z`. (default :obj:`None`) Returns: tf.Tensor: A 0-d tensor, the training loss which can be optimized by gradient descent. See Also: :class:`tfsnippet.variational.VariationalChain`, :class:`tfsnippet.variational.VariationalTrainingObjectives` """ with tf.name_scope('VAE.get_training_loss'): if n_z is not None: if is_tensor_object(n_z): raise TypeError('Cannot choose the variational solver ' 'automatically for dynamic `n_z`') n_z = validate_n_samples(n_z, 'n_z') # derive the variational chain chain = self.chain(x, n_z) z = chain.variational['z'] # auto choose a variational solver for training loss if n_z is not None and n_z > 1: if z.is_reparameterized: solver = chain.vi.training.iwae else: solver = chain.vi.training.vimco else: if z.is_reparameterized: solver = chain.vi.training.sgvb else: solver = chain.vi.training.reinforce # derive the training loss return tf.reduce_mean(solver())
def get_refactor_probability(self, window, missing=None, n_z=None, mcmc_iteration=None, last_point_only=True): """ 获得x,y的重构概率 “重建概率”越大,异常点的可能性就越小。如果想要直接表明异常的严重程度,可以取这个分数的负值。 Args: window (tf.Tensor): 二维32位浮点数:class:`tf.Tensor`, KPI观测的小切片窗口。 missing (tf.Tensor): 二维32位整型 :class:`tf.Tensor`, 每个小切片中是否有有缺失点的窗口。 n_z (int or None): 每个“x”要取的“z”样本数。(default :obj:`None`, 一个没有明确抽样维度的样本) mcmc_iteration (int or tf.Tensor): 缺失点注入的迭代次数(default :obj:`None`, 不迭代) last_point_only (bool): 是否获得窗口最后一个点的重构概率(default :obj:`True`) Returns: tf.Tensor: 重构概率, 如果`last_point_only` 是:obj:`True`,shape为 ``(len(x) - self.x_dims + 1,)`` 反之,则为``(len(x) - self.x_dims + 1, self.x_dims)`` 这是因为第一个``self.x_dims - 1`` 点不是任何窗口的最后一个点。 """ with tf.name_scope('Donut.get_refactor_probability'): # MCMC缺失点注入 # 如果没有缺失且需要迭代重构 if missing is not None and mcmc_iteration: x_r = iterative_masked_reconstruct( reconstruct=self.vae.reconstruct, x=window, mask=missing, iter_count=mcmc_iteration, back_prop=False, ) # 使用原数据 else: x_r = window # 获得重构概率 # 派生一个:math:`q(z|h(x))`实例,变分网。如果未观察到z,则对每个x取z的样本数。 q_net = self.vae.variational(x=x_r, n_z=n_z) # notice: x=x_r # 派生一个:math:`p(x|h(z))`实例,模型网。 p_net = self.vae.model(z=q_net['z'], x=window, n_z=n_z) # notice: x=x # 计算:class:`StochasticTensor`的对数密度。覆盖已配置的'group_ndimm'为0。 r_prob = p_net['x'].log_prob(group_ndims=0) # 如果未观察到z,则对每个x取z的样本数。样本数不为None if n_z is not None: # 验证' n_samples '参数。用装饰器,定义支持with语句上下文管理器的工厂函数 n_z = validate_n_samples(n_z, 'n_z') assert_shape_op = tf.assert_equal( tf.shape(r_prob), tf.stack([n_z, tf.shape(window)[0], self.x_dims]), message='Unexpected shape of reconstruction prob') # 控制依赖的上下文管理器,使用with关键字可以让在这个上下文环境中的操作都在[assert_shape_op]执行。' # graph. control_dependencies() '的包装器,使用默认的图形。 with tf.control_dependencies([assert_shape_op]): # 计算张量的维数中元素的均值。 # 沿着给定的维数0减少r_prob。在0中的每一项张量的秩都会减少1。 r_prob = tf.reduce_mean(r_prob, axis=0) # 获得窗口最后一个点的重构概率 if last_point_only: r_prob = r_prob[:, -1] return r_prob