Exemplo n.º 1
0
    def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]:
        """
        Compute the p-value resulting from a permutation test using the maximum mean discrepancy
        as a distance measure between the reference data and the data to be tested. The kernel
        used within the MMD is first trained to maximise an estimate of the resulting test power.

        Parameters
        ----------
        x
            Batch of instances.

        Returns
        -------
        p-value obtained from the permutation test, the MMD^2 between the reference and test set,
        and the MMD^2 threshold above which drift is flagged.
        """
        x_ref, x_cur = self.preprocess(x)
        (x_ref_tr, x_cur_tr), (x_ref_te,
                               x_cur_te) = self.get_splits(x_ref, x_cur)
        dl_ref_tr, dl_cur_tr = self.dataloader(
            self.dataset(x_ref_tr)), self.dataloader(self.dataset(x_cur_tr))

        self.kernel = deepcopy(
            self.original_kernel) if self.retrain_from_scratch else self.kernel
        self.kernel = self.kernel.to(self.device)
        train_args = [self.j_hat, (dl_ref_tr, dl_cur_tr), self.device]
        LearnedKernelDriftTorch.trainer(*train_args,
                                        **self.train_kwargs)  # type: ignore

        if isinstance(x_ref_te, np.ndarray) and isinstance(
                x_cur_te, np.ndarray):
            x_all = np.concatenate([x_ref_te, x_cur_te], axis=0)
        else:
            x_all = x_ref_te + x_cur_te
        kernel_mat = self.kernel_mat_fn(x_all, x_all, self.kernel)
        kernel_mat = kernel_mat - torch.diag(
            kernel_mat.diag())  # zero diagonal
        mmd2 = mmd2_from_kernel_matrix(kernel_mat,
                                       len(x_cur_te),
                                       permute=False,
                                       zero_diag=False)
        mmd2_permuted = torch.Tensor([
            mmd2_from_kernel_matrix(kernel_mat,
                                    len(x_cur_te),
                                    permute=True,
                                    zero_diag=False)
            for _ in range(self.n_permutations)
        ])
        if self.device.type == 'cuda':
            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
        p_val = (mmd2 <= mmd2_permuted).float().mean()

        idx_threshold = int(self.p_val * len(mmd2_permuted))
        distance_threshold = torch.sort(mmd2_permuted,
                                        descending=True).values[idx_threshold]
        return p_val.numpy().item(), mmd2.numpy().item(
        ), distance_threshold.numpy()
Exemplo n.º 2
0
    def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]:
        """
        Compute the p-value resulting from a permutation test using the maximum mean discrepancy
        as a distance measure between the reference data and the data to be tested.

        Parameters
        ----------
        x
            Batch of instances.

        Returns
        -------
        p-value obtained from the permutation test, the MMD^2 between the reference and test set,
        and the MMD^2 threshold above which drift is flagged.
        """
        x_ref, x = self.preprocess(x)
        x_ref = torch.from_numpy(x_ref).to(
            self.device)  # type: ignore[assignment]
        x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
        # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
        n = x.shape[0]
        kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
        kernel_mat = kernel_mat - torch.diag(
            kernel_mat.diag())  # zero diagonal
        mmd2 = mmd2_from_kernel_matrix(kernel_mat,
                                       n,
                                       permute=False,
                                       zero_diag=False)
        mmd2_permuted = torch.Tensor([
            mmd2_from_kernel_matrix(kernel_mat,
                                    n,
                                    permute=True,
                                    zero_diag=False)
            for _ in range(self.n_permutations)
        ])
        if self.device.type == 'cuda':
            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
        p_val = (mmd2 <= mmd2_permuted).float().mean()
        # compute distance threshold
        idx_threshold = int(self.p_val * len(mmd2_permuted))
        distance_threshold = torch.sort(mmd2_permuted,
                                        descending=True).values[idx_threshold]
        return p_val.numpy().item(), mmd2.numpy().item(
        ), distance_threshold.numpy()
Exemplo n.º 3
0
    def score(self, x: Union[np.ndarray,
                             list]) -> Tuple[float, float, np.ndarray]:
        """
        Compute the p-value resulting from a permutation test using the maximum mean discrepancy
        as a distance measure between the reference data and the data to be tested.

        Parameters
        ----------
        x
            Batch of instances.

        Returns
        -------
        p-value obtained from the permutation test, the MMD^2 between the reference and test set
        and the MMD^2 values from the permutation test.
        """
        x_ref, x = self.preprocess(x)
        x_ref = torch.from_numpy(x_ref).to(self.device)
        x = torch.from_numpy(x).to(self.device)
        # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
        n = x.shape[0]
        kernel_mat = self.kernel_matrix(x_ref, x)
        kernel_mat = kernel_mat - torch.diag(
            kernel_mat.diag())  # zero diagonal
        mmd2 = mmd2_from_kernel_matrix(kernel_mat,
                                       n,
                                       permute=False,
                                       zero_diag=False)
        mmd2_permuted = torch.Tensor([
            mmd2_from_kernel_matrix(kernel_mat,
                                    n,
                                    permute=True,
                                    zero_diag=False)
            for _ in range(self.n_permutations)
        ])
        if self.device.type == 'cuda':
            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
        p_val = (mmd2 <= mmd2_permuted).float().mean()
        return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()