Пример #1
0
    def test_static_values(self):
        # type checks
        for o in [object(), 1.2, LONG_MAX]:
            with pytest.raises(TypeError,
                               match='xyz cannot be converted to int32'):
                _ = validate_n_samples(o, 'xyz')

        # value checks
        self.assertIsNone(validate_n_samples(None, 'xyz'))
        self.assertEqual(validate_n_samples(1, 'xyz'), 1)
        with pytest.raises(ValueError, match='xyz must be positive'):
            _ = validate_n_samples(0, 'xyz')
        with pytest.raises(ValueError, match='xyz must be positive'):
            _ = validate_n_samples(-1, 'xyz')
Пример #2
0
    def get_score(self, x, y=None, n_z=None, mcmc_iteration=None,
                  last_point_only=True):
        """
        Get the reconstruction probability for `x` and `y`.

        The larger `reconstruction probability`, the less likely a point
        is anomaly.  You may take the negative of the score, if you want
        something to directly indicate the severity of anomaly.

        Args:
            x (tf.Tensor): 2-D `float32` :class:`tf.Tensor`, the windows of
                KPI observations in a mini-batch.
            y (tf.Tensor): 2-D `int32` :class:`tf.Tensor`, the windows of
                missing point indicators in a mini-batch.
            n_z (int or None): Number of `z` samples to take for each `x`.
                (default :obj:`None`, one sample without explicit sampling
                dimension)
            mcmc_iteration (int or tf.Tensor): Iteration count for MCMC
                missing data imputation. (default :obj:`None`, no iteration)
            last_point_only (bool): Whether to obtain the reconstruction
                probability of only the last point in each window?
                (default :obj:`True`)

        Returns:
            tf.Tensor: The reconstruction probability, with the shape
                ``(len(x) - self.x_dims + 1,)`` if `last_point_only` is
                :obj:`True`, or ``(len(x) - self.x_dims + 1, self.x_dims)``
                if `last_point_only` is :obj:`False`.  This is because the
                first ``self.x_dims - 1`` points are not the last point of
                any window.
        """
        with tf.name_scope('Donut.get_score'):
            # MCMC missing data imputation
            if y is not None and mcmc_iteration:
                x_r = iterative_masked_reconstruct(
                    reconstruct=self.vae.reconstruct,
                    x=x,
                    mask=y,
                    iter_count=mcmc_iteration,
                    back_prop=False,
                )
            else:
                x_r = x

            # get the reconstruction probability
            q_net = self.vae.variational(x=x_r, n_z=n_z)  # notice: x=x_r
            p_net = self.vae.model(z=q_net['z'], x=x, n_z=n_z)  # notice: x=x
            r_prob = p_net['x'].log_prob(group_ndims=0)
            if n_z is not None:
                n_z = validate_n_samples(n_z, 'n_z')
                assert_shape_op = tf.assert_equal(
                    tf.shape(r_prob),
                    tf.stack([n_z, tf.shape(x)[0], self.x_dims]),
                    message='Unexpected shape of reconstruction prob'
                )
                with tf.control_dependencies([assert_shape_op]):
                    r_prob = tf.reduce_mean(r_prob, axis=0)
            if last_point_only:
                r_prob = r_prob[:, -1]
            return r_prob
Пример #3
0
    def test_dynamic_values(self):
        # type checks
        for o in [
                tf.constant(1.2, dtype=tf.float32),
                tf.constant(LONG_MAX, dtype=tf.int64)
        ]:
            with pytest.raises(TypeError,
                               match='xyz cannot be converted to int32'):
                _ = validate_n_samples(o, 'xyz')

        # value checks
        with self.test_session():
            self.assertEqual(
                validate_n_samples(tf.constant(1, dtype=tf.int32),
                                   'xyz').eval(), 1)
            with pytest.raises(Exception, match='xyz must be positive'):
                _ = validate_n_samples(tf.constant(0, dtype=tf.int32),
                                       'xyz').eval()
            with pytest.raises(Exception, match='xyz must be positive'):
                _ = validate_n_samples(tf.constant(-1, dtype=tf.int32),
                                       'xyz').eval()
Пример #4
0
    def get_training_loss(self, x, n_z=None):
        """
        Get the training loss for this VAE.

        The variational solver is automatically chosen according to
        `z.is_reparameterized`, and the argument `n_z`, by the following rules:

        1. If `z.is_reparameterized` is :obj:`True`, then:

            1. If `n_z` > 1, use `iwae`.
            2. If `n_z` == 1 or `n_z` is :obj:`None`, use `sgvb`.

        2. If `z.is_reparameterized` is :obj:`False`, then:

            1. If `n_z` > 1, use `vimco`.
            2. If `n_z` == 1 or `n_z` is :obj:`None`, use `reinforce`.

        Dynamic `n_z` is not supported by this method.  Also, Reweighted
        Wake-Sleep algorithm is not a choice of this method.  To derive
        the training loss for either situation, use :meth:`chain`
        to obtain a :class:`~tfsnippet.variational.VariationalChain`,
        and further obtain the loss by `chain.vi.training.[algorithm]`.

        Args:
            x: The input observation `x`.
            n_z (int or None): Number of `z` samples to take.  Must be
                :obj:`None` or a constant integer.  Dynamic tensors are not
                accepted, since we cannot automatically choose a variational
                solver for undeterministic `n_z`. (default :obj:`None`)

        Returns:
            tf.Tensor: A 0-d tensor, the training loss which can be optimized
                by gradient descent.

        See Also:
            :class:`tfsnippet.variational.VariationalChain`,
            :class:`tfsnippet.variational.VariationalTrainingObjectives`
        """
        with tf.name_scope('VAE.get_training_loss'):
            if n_z is not None:
                if is_tensor_object(n_z):
                    raise TypeError('Cannot choose the variational solver '
                                    'automatically for dynamic `n_z`')
                n_z = validate_n_samples(n_z, 'n_z')

            # derive the variational chain
            chain = self.chain(x, n_z)
            z = chain.variational['z']

            # auto choose a variational solver for training loss
            if n_z is not None and n_z > 1:
                if z.is_reparameterized:
                    solver = chain.vi.training.iwae
                else:
                    solver = chain.vi.training.vimco
            else:
                if z.is_reparameterized:
                    solver = chain.vi.training.sgvb
                else:
                    solver = chain.vi.training.reinforce

            # derive the training loss
            return tf.reduce_mean(solver())
Пример #5
0
    def get_refactor_probability(self,
                                 window,
                                 missing=None,
                                 n_z=None,
                                 mcmc_iteration=None,
                                 last_point_only=True):
        """
        获得x,y的重构概率
        “重建概率”越大,异常点的可能性就越小。如果想要直接表明异常的严重程度,可以取这个分数的负值。

        Args:
            window (tf.Tensor): 二维32位浮点数:class:`tf.Tensor`, KPI观测的小切片窗口。
            missing (tf.Tensor): 二维32位整型 :class:`tf.Tensor`, 每个小切片中是否有有缺失点的窗口。
            n_z (int or None): 每个“x”要取的“z”样本数。(default :obj:`None`, 一个没有明确抽样维度的样本)
            mcmc_iteration (int or tf.Tensor): 缺失点注入的迭代次数(default :obj:`None`, 不迭代)
            last_point_only (bool): 是否获得窗口最后一个点的重构概率(default :obj:`True`)

        Returns:
            tf.Tensor: 重构概率,
                如果`last_point_only` 是:obj:`True`,shape为 ``(len(x) - self.x_dims + 1,)``
                反之,则为``(len(x) - self.x_dims + 1, self.x_dims)``
                这是因为第一个``self.x_dims - 1`` 点不是任何窗口的最后一个点。
        """
        with tf.name_scope('Donut.get_refactor_probability'):
            # MCMC缺失点注入
            # 如果没有缺失且需要迭代重构
            if missing is not None and mcmc_iteration:
                x_r = iterative_masked_reconstruct(
                    reconstruct=self.vae.reconstruct,
                    x=window,
                    mask=missing,
                    iter_count=mcmc_iteration,
                    back_prop=False,
                )
            # 使用原数据
            else:
                x_r = window

            # 获得重构概率
            # 派生一个:math:`q(z|h(x))`实例,变分网。如果未观察到z,则对每个x取z的样本数。
            q_net = self.vae.variational(x=x_r, n_z=n_z)  # notice: x=x_r
            # 派生一个:math:`p(x|h(z))`实例,模型网。
            p_net = self.vae.model(z=q_net['z'], x=window,
                                   n_z=n_z)  # notice: x=x
            # 计算:class:`StochasticTensor`的对数密度。覆盖已配置的'group_ndimm'为0。
            r_prob = p_net['x'].log_prob(group_ndims=0)
            # 如果未观察到z,则对每个x取z的样本数。样本数不为None
            if n_z is not None:
                # 验证' n_samples '参数。用装饰器,定义支持with语句上下文管理器的工厂函数
                n_z = validate_n_samples(n_z, 'n_z')
                assert_shape_op = tf.assert_equal(
                    tf.shape(r_prob),
                    tf.stack([n_z, tf.shape(window)[0], self.x_dims]),
                    message='Unexpected shape of reconstruction prob')
                # 控制依赖的上下文管理器,使用with关键字可以让在这个上下文环境中的操作都在[assert_shape_op]执行。'
                # graph. control_dependencies() '的包装器,使用默认的图形。
                with tf.control_dependencies([assert_shape_op]):
                    # 计算张量的维数中元素的均值。
                    # 沿着给定的维数0减少r_prob。在0中的每一项张量的秩都会减少1。
                    r_prob = tf.reduce_mean(r_prob, axis=0)
            # 获得窗口最后一个点的重构概率
            if last_point_only:
                r_prob = r_prob[:, -1]
            return r_prob