Пример #1
0
    def test_statistic(self, matrix_X, matrix_Y):
        """
        Computes the HHG correlation measure between two datasets.

        :param matrix_X: a [n*p] data matrix, a matrix with n samples in p dimensions
        :type matrix_X: 2D `numpy.array`

        :param matrix_Y: a [n*q] data matrix, a matrix with n samples in q dimensions
        :type matrix_Y: 2D `numpy.array`

        :param replication_factor: specifies the number of replications to use for
                                   the permutation test. Defaults to 1000.
        :type replication_factor: int

        :return: returns a list of two items, that contains:

            - :test_statistic_: test statistic
            - :test_statistic_metadata_: (optional) a ``dict`` of metadata other than the p_value,
                                         that the independence tests computes in the process
        :rtype: float, dict

        **Example:**

        >>> import numpy as np
        >>> from mgcpy.independence_tests.hhg import HHG

        >>> X = np.array([0.07487683, -0.18073412, 0.37266440, 0.06074847, 0.76899045,
                      0.51862516, -0.13480764, -0.54368083, -0.73812644, 0.54910974]).reshape(-1, 1)
        >>> Y = np.array([-1.31741173, -0.41634224, 2.24021815, 0.88317196, 2.00149312,
                      1.35857623, -0.06729464, 0.16168344, -0.61048226, 0.41711113]).reshape(-1, 1)
        >>> hhg = HHG()
        >>> hhg_test_stat = hhg.test_statistic(X, Y)
        """
        distance_matrix_X, distance_matrix_Y = compute_distance(matrix_X, matrix_Y, self.compute_distance_matrix)

        n = distance_matrix_X.shape[0]
        S = np.zeros((n, n))

        for i in range(n):
            for j in range(n):
                if i != j:
                    tmp1 = distance_matrix_X[i, :] <= distance_matrix_X[i, j]
                    tmp2 = distance_matrix_Y[i, :] <= distance_matrix_Y[i, j]
                    t11 = np.sum(tmp1 * tmp2) - 2
                    t12 = np.sum(tmp1 * (1-tmp2))
                    t21 = np.sum((1-tmp1) * tmp2)
                    t22 = np.sum((1-tmp1) * (1-tmp2))
                    denom = (t11+t12) * (t21+t22) * (t11+t21) * (t12+t22)
                    if denom > 0:
                        S[i, j] = (n-2) * \
                            np.power((t12*t21 - t11*t22), 2) / denom
        corr = np.sum(S)

        # no metadata for HHG
        self.test_statistic_metadata_ = {}
        self.test_statistic_ = corr

        return self.test_statistic_, self.test_statistic_metadata_
Пример #2
0
    def test_statistic(self, matrix_X, matrix_Y, p = None):
        """
        Computes the MGCX measure between two time series datasets.

            - It first computes all the local correlations
            - Then, it returns the maximal statistic among all local correlations based on thresholding.

        :param matrix_X: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*p]`` data matrix, a matrix with ``n`` samples in ``p`` dimensions
        :type matrix_X: 2D numpy.array

        :param matrix_Y: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*q]`` data matrix, a matrix with ``n`` samples in ``q`` dimensions
        :type matrix_Y: 2D numpy.array

        :param p: bandwidth parameter for Bartlett Kernel.
        :type p: float

        :return: returns a list of two items, that contains:

            - :test_statistic: the sample mgc_ts statistic (not necessarily within [-1,1])
            - :test_statistic_metadata: a ``dict`` of metadata with the following keys:
                    - :dist_mtx_X: the distance matrix of sample X
                    - :dist_mtx_Y: the distance matrix of sample X
        :rtype: list

        **Example:**

        >>> import numpy as np
        >>> from mgcpy.independence_tests.mgc.mgc import MGC
        >>>
        >>> X = np.array([0.07487683, -0.18073412, 0.37266440, 0.06074847, 0.76899045,
        ...           0.51862516, -0.13480764, -0.54368083, -0.73812644, 0.54910974]).reshape(-1, 1)
        >>> Y = np.array([-1.31741173, -0.41634224, 2.24021815, 0.88317196, 2.00149312,
        ...           1.35857623, -0.06729464, 0.16168344, -0.61048226, 0.41711113]).reshape(-1, 1)
        >>> mgc_ts = MGC_TS()
        >>> mgc_ts_statistic, test_statistic_metadata = mgc.test_statistic(X, Y)
        """
        assert matrix_X.shape[0] == matrix_Y.shape[0], "Matrices X and Y need to be of dimensions [n, p] and [n, q], respectively, where p can be equal to q"

        n = matrix_X.shape[0]
        if len(matrix_X.shape) == 1:
            matrix_X = matrix_X.reshape((n,1))
        if len(matrix_Y.shape) == 1:
            matrix_Y = matrix_Y.reshape((n,1))
        matrix_X, matrix_Y = compute_distance(matrix_X, matrix_Y, self.compute_distance_matrix)

        M = self.max_lag if self.max_lag is not None else math.ceil(math.sqrt(n))
        mgc = self.mgc

        # Collect the test statistic by lag, and sum them for the full test statistic.
        dependence_by_lag = np.zeros(M+1)
        mgc_statistic, mgc_metadata = mgc.test_statistic(matrix_X, matrix_Y)
        dependence_by_lag[0] = np.maximum(0.0, mgc_statistic)
        max_dependence = dependence_by_lag[0]
        optimal_lag = 0
        optimal_scale = mgc_metadata['optimal_scale']

        # TO DO: parallelize?
        for j in range(1,M+1):
            dist_mtx_X = matrix_X[j:n,j:n]
            dist_mtx_Y = matrix_Y[0:(n-j),0:(n-j)]
            mgc_statistic, mgc_metadata = mgc.test_statistic(dist_mtx_X, dist_mtx_Y)
            dependence_by_lag[j] = (n-j)*np.maximum(0.0, mgc_statistic) / n
            if dependence_by_lag[j] > max_dependence:
                max_dependence = dependence_by_lag[j]
                optimal_lag = j
                optimal_scale = mgc_metadata['optimal_scale']


        # Reporting optimal lag
        self.test_statistic_metadata_ = { 'optimal_lag' : optimal_lag,
                                    'optimal_scale' : optimal_scale,
                                    'dependence_by_lag' : dependence_by_lag }
        self.test_statistic_ = np.sum(dependence_by_lag)
        return self.test_statistic_, self.test_statistic_metadata_
Пример #3
0
    def test_statistic(self, matrix_X, matrix_Y, p=None):
        """
        Computes the (summed across lags) cross distance covariance estimate between two time series.

        :param matrix_X: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*p]`` data matrix, a matrix with ``n`` samples in ``p`` dimensions
        :type matrix_X: 2D numpy.array

        :param matrix_Y: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*q]`` data matrix, a matrix with ``n`` samples in ``q`` dimensions
        :type matrix_Y: 2D numpy.array

        :param p: bandwidth parameter for Bartlett Kernel.
        :type p: float

        :return: returns a list of two items, that contains:

            - :test_statistic: the sample cdcv statistic (not necessarily within [-1,1])
            - :test_statistic_metadata: a ``dict`` of metadata with the following keys:
                    - :dist_mtx_X: the distance matrix of sample X
                    - :dist_mtx_Y: the distance matrix of sample X
        :rtype: list

        **Example:**

        >>> import numpy as np
        >>> from mgcpy.independence_tests.dcorr import DCorr
        >>>
        >>> X = np.array([0.07487683, -0.18073412, 0.37266440, 0.06074847, 0.76899045,
        ...           0.51862516, -0.13480764, -0.54368083, -0.73812644, 0.54910974]).reshape(-1, 1)
        >>> Y = np.array([-1.31741173, -0.41634224, 2.24021815, 0.88317196, 2.00149312,
        ...           1.35857623, -0.06729464, 0.16168344, -0.61048226, 0.41711113]).reshape(-1, 1)
        >>> cdcv = CDCV(which_test = 'unbiased')
        >>> cdcv_statistic = cdcv.test_statistic(X, Y)
        """
        assert matrix_X.shape[0] == matrix_Y.shape[
            0], "Matrices X and Y need to be of dimensions [n, p] and [n, q], respectively, where p can be different from q"
        if self.which_test == "unbiased" and matrix_X.shape[0] <= 3:
            raise ValueError(
                'Cannot use unbiased estimator of distance covariance with n <= 3.'
            )

        # Represent univariate data as matrices.
        # Use the matrix shape and diagonal elements to determine if the given data is a distance matrix or not.
        n = matrix_X.shape[0]
        if len(matrix_X.shape) == 1:
            matrix_X = matrix_X.reshape((n, 1))
        if len(matrix_Y.shape) == 1:
            matrix_Y = matrix_Y.reshape((n, 1))
        matrix_X, matrix_Y = compute_distance(matrix_X, matrix_Y,
                                              self.compute_distance_matrix)

        M = self.max_lag if self.max_lag is not None else math.ceil(
            math.sqrt(n))
        dcorr = self.dcorr

        # Collect the test statistic by lag, and sum them for the full test statistic.
        dependence_by_lag = np.zeros(M + 1)
        dcorr_statistic, _ = dcorr.test_statistic(matrix_X, matrix_Y)
        dependence_by_lag[0] = np.maximum(0.0, dcorr_statistic)

        # TO DO: parallelize?
        for j in range(1, M + 1):
            dist_mtx_X = matrix_X[j:n, j:n]
            dist_mtx_Y = matrix_Y[0:(n - j), 0:(n - j)]
            dcorr_statistic, _ = dcorr.test_statistic(dist_mtx_X, dist_mtx_Y)
            dependence_by_lag[j] = (n - j) * np.maximum(0.0,
                                                        dcorr_statistic) / n

        # Reporting optimal lag
        optimal_lag = np.argmax(dependence_by_lag)
        test_statistic_metadata = {
            'optimal_lag': optimal_lag,
            'dependence_by_lag': dependence_by_lag
        }
        self.test_statistic_ = np.sum(dependence_by_lag)
        self.test_statistic_metadata_ = test_statistic_metadata
        return self.test_statistic_, test_statistic_metadata
Пример #4
0
    def test_statistic(self,
                       matrix_X,
                       matrix_Y,
                       is_fast=False,
                       fast_dcorr_data={}):
        """
        Computes the distance correlation between two datasets.

        :param matrix_X: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``p`` dimensions
        :type matrix_X: 2D numpy.array

        :param matrix_Y: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``q`` dimensions
        :type matrix_Y: 2D numpy.array

        :param is_fast: is a boolean flag which specifies if the test_statistic should be computed (approximated)
                        using the fast version of dcorr. This defaults to False.
        :type is_fast: boolean

        :param fast_dcorr_data: a ``dict`` of fast dcorr params, refer: self._fast_dcorr_test_statistic

            - :sub_samples: specifies the number of subsamples.
        :type fast_dcorr_data: dictonary

        :return: returns a list of two items, that contains:

            - :test_statistic: the sample dcorr statistic within [-1, 1]
            - :independence_test_metadata: a ``dict`` of metadata with the following keys:
                    - :variance_X: the variance of the data matrix X
                    - :variance_Y: the variance of the data matrix Y
        :rtype: list

        **Example:**

        >>> import numpy as np
        >>> from mgcpy.independence_tests.dcorr import DCorr
        >>>
        >>> X = np.array([0.07487683, -0.18073412, 0.37266440, 0.06074847, 0.76899045,
        ...           0.51862516, -0.13480764, -0.54368083, -0.73812644, 0.54910974]).reshape(-1, 1)
        >>> Y = np.array([-1.31741173, -0.41634224, 2.24021815, 0.88317196, 2.00149312,
        ...           1.35857623, -0.06729464, 0.16168344, -0.61048226, 0.41711113]).reshape(-1, 1)
        >>> dcorr = DCorr(which_test = 'unbiased')
        >>> dcorr_statistic, test_statistic_metadata = dcorr.test_statistic(X, Y)
        """
        assert matrix_X.shape[0] == matrix_Y.shape[
            0], "Matrices X and Y need to be of dimensions [n, p] and [n, q], respectively, where p can be equal to q"

        if is_fast:
            test_statistic, test_statistic_metadata = self._fast_dcorr_test_statistic(
                matrix_X, matrix_Y, **fast_dcorr_data)
        else:
            matrix_X, matrix_Y = compute_distance(matrix_X, matrix_Y,
                                                  self.compute_distance_matrix)

            # perform distance transformation
            # transformed_dist_mtx_X, transformed_dist_mtx_Y = dist_transform(matrix_X, matrix_Y, self.which_test)

            transformed_distance_matrices = transform_distance_matrix(
                matrix_X,
                matrix_Y,
                base_global_correlation=self.which_test,
                is_ranked=False)
            transformed_dist_mtx_X = transformed_distance_matrices[
                'centered_distance_matrix_A']
            transformed_dist_mtx_Y = transformed_distance_matrices[
                'centered_distance_matrix_B']

            # transformed_dist_mtx need not be symmetric
            covariance = self.compute_global_covariance(
                transformed_dist_mtx_X, np.transpose(transformed_dist_mtx_Y))
            variance_X = self.compute_global_covariance(
                transformed_dist_mtx_X, np.transpose(transformed_dist_mtx_X))
            variance_Y = self.compute_global_covariance(
                transformed_dist_mtx_Y, np.transpose(transformed_dist_mtx_Y))

            # check the case when one of the dataset has zero variance
            if variance_X <= 0 or variance_Y <= 0:
                correlation = 0
            else:
                if self.is_paired:
                    n = transformed_dist_mtx_X.shape[0]
                    correlation = (variance_X/n/(n-1)) + (variance_Y/n/(n-1)) \
                        - 2*np.sum(np.multiply(transformed_dist_mtx_X, np.transpose(transformed_dist_mtx_Y)).diagonal())/n
                else:
                    correlation = covariance / np.real(
                        np.sqrt(variance_X * variance_Y))

            # store the variance of X, variance of Y and the covariace as metadata
            test_statistic_metadata = {
                'variance_X': variance_X,
                'variance_Y': variance_Y,
                'covariance': covariance
            }

            # use absolute value for mantel coefficients

            if self.which_test == 'mantel':
                test_statistic = np.abs(correlation)
            else:
                test_statistic = correlation

        self.test_statistic_ = test_statistic
        self.test_statistic_metadata_ = test_statistic_metadata
        return test_statistic, test_statistic_metadata
Пример #5
0
    def p_value_block(self, matrix_X, matrix_Y, replication_factor=1000):
        """
        Tests independence between two datasets using block permutation test.

        :param matrix_X: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*p]`` data matrix, a matrix with ``n`` samples in ``p`` dimensions
        :type matrix_X: 2D numpy.array

        :param matrix_Y: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*q]`` data matrix, a matrix with ``n`` samples in ``q`` dimensions
        :type matrix_Y: 2D numpy.array

        :param replication_factor: specifies the number of replications to use for
                                   the permutation test. Defaults to ``1000``.
        :type replication_factor: integer

        :return: returns a list of two items, that contains:

            - :p_value: P-value of MGC
            - :metadata: a ``dict`` of metadata with the following keys:
                    - :null_distribution: numpy array representing distribution of test statistic under null.
        :rtype: list

        **Example:**

        >>> import numpy as np
        >>> from mgcpy.independence_tests.mgc.mgc_ts import MGC_TS
        >>>
        >>> X = np.array([0.07487683, -0.18073412, 0.37266440, 0.06074847, 0.76899045,
        ...           0.51862516, -0.13480764, -0.54368083, -0.73812644, 0.54910974]).reshape(-1, 1)
        >>> Y = np.array([-1.31741173, -0.41634224, 2.24021815, 0.88317196, 2.00149312,
        ...           1.35857623, -0.06729464, 0.16168344, -0.61048226, 0.41711113]).reshape(-1, 1)
        >>> mgc_ts = MGC_TS()
        >>> p_value, metadata = mgc_ts.p_value(X, Y, replication_factor = 100)
        """
        assert matrix_X.shape[0] == matrix_Y.shape[0], "Matrices X and Y need to be of dimensions [n, p] and [n, q], respectively, where p can be equal to q"

        # Compute test statistic
        n = matrix_X.shape[0]
        if len(matrix_X.shape) == 1:
            matrix_X = matrix_X.reshape((n, 1))
        if len(matrix_Y.shape) == 1:
            matrix_Y = matrix_Y.reshape((n, 1))
        matrix_X, matrix_Y = compute_distance(matrix_X, matrix_Y, self.compute_distance_matrix)
        test_statistic, test_statistic_metadata = self.test_statistic(matrix_X, matrix_Y)

        # Block bootstrap
        block_size = int(np.ceil(np.sqrt(n)))
        test_stats_null = np.zeros(replication_factor)
        for rep in range(replication_factor):
            # Generate new time series sample for Y
            permuted_indices = np.r_[[np.arange(t, t + block_size) for t in np.random.choice(n, n // block_size + 1)]].flatten()[:n]
            permuted_indices = np.mod(permuted_indices, n)
            permuted_Y = matrix_Y[np.ix_(permuted_indices, permuted_indices)]

            # Compute test statistic
            test_stats_null[rep], _ = self.test_statistic(matrix_X, permuted_Y)

        self.p_value_ = np.sum(np.greater(test_stats_null, test_statistic)) / replication_factor
        if self.p_value == 0.0:
            self.p_value = 1 / replication_factor
        self.p_value_metadata_ = {'null_distribution': test_stats_null}

        return self.p_value_, self.p_value_metadata_
Пример #6
0
    def test_statistic(self, matrix_X, matrix_Y, permutations=0, individual=0, disttype='cityblock'):
        """
        Computes MDMR Pseudo-F statistic between two datasets.

        - It first takes the distance matrix of Y (by )
        - Next it regresses X into a portion due to Y and a portion due to residual
        - The p-value is for the null hypothesis that the variable of X is not correlated with Y's distance matrix

        :param data_matrix_X: (optional, default picked from class attr) is interpreted as:

            - a ``[n*d]`` data matrix, a matrix with n samples in d dimensions
        :type data_matrix_X: 2D `numpy.array`

        :param data_matrix_Y: (optional, default picked from class attr) is interpreted as:

            - a ``[n*d]`` data matrix, a matrix with n samples in d dimensions
        :type data_matrix_Y: 2D `numpy.array`

        :parameter 'individual':

            -integer, `0` or `1`
            with value `0` tests the entire X matrix (default)
            with value `1` tests the entire X matrix and then each predictor variable individually

        :return: with individual = `0`, returns 1 values, with individual = `1` returns 2 values, containing:

            -the test statistic of the entire X matrix
            -for individual = 1, an array with the variable of X in the first column,
                the test statistic in the second, and the permutation p-value in the third (which here will always be 1)
        :rtype: list
        """
        X = matrix_X
        Y = matrix_Y

        # calculate distance matrix of Y
        D, _ = compute_distance(Y, np.identity(1), self.compute_distance_matrix)
        a = D.shape[0]**2
        D = D.reshape((a, 1))

        predictors = np.arange(X.shape[1])
        predsingle = X.shape[1]
        check_rank(X)

        # check number of subjects compatible
        subjects = X.shape[0]
        if subjects != np.sqrt(D.shape[0]):
            raise Exception("# of subjects incompatible between X and D")

        X = np.hstack((np.ones((X.shape[0], 1)), X))
        predictors = np.array(predictors)
        predictors += 1

        # Gower Center the distance matrix of Y
        Gs = gower_center_many(D)

        m2 = float(X.shape[1] - predictors.shape[0])
        nm = float(subjects - X.shape[1])

        # form permutation indexes
        permutation_indexes = np.zeros((permutations + 1, subjects), dtype=np.int)
        permutation_indexes[0, :] = range(subjects)
        for i in range(1, permutations + 1):
            permutation_indexes[i, :] = np.random.permutation(subjects)

        H2perms = gen_H2_perms(X, predictors, permutation_indexes)
        IHperms = gen_IH_perms(X, predictors, permutation_indexes)

        # Calculate test statistic
        F_perms = calc_ftest(H2perms, IHperms, Gs, m2, nm)

        # Calculate p-value
        p_vals = None
        if permutations > 0:
            p_vals = fperms_to_pvals(F_perms)
        F_permtotal = F_perms[0, :]
        self.test_statistic_ = F_permtotal
        if individual == 0:
            return self.test_statistic_, self.test_statistic_metadata_

        # code for individual test
        if individual == 1:
            results = np.zeros((predsingle, 3))
            for predictors in range(1, predsingle+1):
                predictors = np.array([predictors])

                Gs = gower_center_many(D)

                m2 = float(X.shape[1] - predictors.shape[0])
                nm = float(subjects - X.shape[1])

                permutation_indexes = np.zeros((permutations + 1, subjects), dtype=np.int)
                permutation_indexes[0, :] = range(subjects)
                for i in range(1, permutations + 1):
                    permutation_indexes[i, :] = np.random.permutation(subjects)

                H2perms = gen_H2_perms(X, predictors, permutation_indexes)
                IHperms = gen_IH_perms(X, predictors, permutation_indexes)

                F_perms = calc_ftest(H2perms, IHperms, Gs, m2, nm)

                p_vals = None
                if permutations > 0:
                    p_vals = fperms_to_pvals(F_perms)
                results[predictors-1, 0] = predictors
                results[predictors-1, 1] = F_perms[0, :]
                results[predictors-1, 2] = p_vals

            return F_permtotal, results
Пример #7
0
    def test_statistic(self,
                       matrix_X,
                       matrix_Y,
                       is_fast=False,
                       fast_mgc_data={}):
        """
        Computes the MGC measure between two datasets.

            - It first computes all the local correlations
            - Then, it returns the maximal statistic among all local correlations based on thresholding.

        :param matrix_X: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*p]`` data matrix, a matrix with ``n`` samples in ``p`` dimensions
        :type matrix_X: 2D numpy.array

        :param matrix_Y: is interpreted as either:

            - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for ``n`` samples OR
            - a ``[n*q]`` data matrix, a matrix with ``n`` samples in ``q`` dimensions
        :type matrix_Y: 2D numpy.array

        :param is_fast: is a boolean flag which specifies if the test_statistic should be computed (approximated)
                        using the fast version of mgc. This defaults to False.
        :type is_fast: boolean

        :param fast_mgc_data: a ``dict`` of fast mgc params, refer: self._fast_mgc_test_statistic

            - :sub_samples: specifies the number of subsamples.
        :type fast_mgc_data: dictonary

        :return: returns a list of two items, that contains:

            - :test_statistic: the sample MGC statistic within [-1, 1]
            - :independence_test_metadata: a ``dict`` of metadata with the following keys:
                    - :local_correlation_matrix: a 2D matrix of all local correlations within ``[-1,1]``
                    - :optimal_scale: the estimated optimal scale as an ``[x, y]`` pair.
        :rtype: list

        **Example:**

        >>> import numpy as np
        >>> from mgcpy.independence_tests.mgc.mgc import MGC
        >>>
        >>> X = np.array([0.07487683, -0.18073412, 0.37266440, 0.06074847, 0.76899045,
        ...           0.51862516, -0.13480764, -0.54368083, -0.73812644, 0.54910974]).reshape(-1, 1)
        >>> Y = np.array([-1.31741173, -0.41634224, 2.24021815, 0.88317196, 2.00149312,
        ...           1.35857623, -0.06729464, 0.16168344, -0.61048226, 0.41711113]).reshape(-1, 1)
        >>> mgc = MGC()
        >>> mgc_statistic, test_statistic_metadata = mgc.test_statistic(X, Y)
        """
        assert matrix_X.shape[0] == matrix_Y.shape[
            0], "Matrices X and Y need to be of dimensions [n, p] and [n, q], respectively, where p can be equal to q"

        if is_fast:
            mgc_statistic, test_statistic_metadata = self._fast_mgc_test_statistic(
                matrix_X, matrix_Y, **fast_mgc_data)
        else:
            distance_matrix_X, distance_matrix_Y = compute_distance(
                matrix_X, matrix_Y, self.compute_distance_matrix)
            local_correlation_matrix = local_correlations(
                distance_matrix_X,
                distance_matrix_Y,
                base_global_correlation=self.base_global_correlation
            )["local_correlation_matrix"]
            m, n = local_correlation_matrix.shape
            if m == 1 or n == 1:
                mgc_statistic = local_correlation_matrix[m - 1][n - 1]
                optimal_scale = m * n
            else:
                sample_size = len(matrix_X) - 1  # sample size minus 1

                # find a connected region of significant local correlations, by thresholding
                significant_connected_region = threshold_local_correlations(
                    local_correlation_matrix, sample_size)

                # find the maximum within the significant region
                result = smooth_significant_local_correlations(
                    significant_connected_region, local_correlation_matrix)
                mgc_statistic, optimal_scale = result["mgc_statistic"], result[
                    "optimal_scale"]

            test_statistic_metadata = {
                "local_correlation_matrix": local_correlation_matrix,
                "optimal_scale": optimal_scale
            }

        self.test_statistic_ = mgc_statistic
        self.test_statistic_metadata_ = test_statistic_metadata
        return mgc_statistic, test_statistic_metadata