Exemplo n.º 1
0
 def test_partial_corr(self):
     """Test function partial_corr.
     Compare with the R package ppcor and JASP.
     """
     df = read_dataset('partial_corr')
     pc = partial_corr(data=df, x='x', y='y', covar='cv1')
     assert round(pc.at['pearson', 'r'], 3) == 0.568
     pc = df.partial_corr(x='x', y='y', covar='cv1', method='spearman')
     # Warning: Spearman slightly different than ppcor package, is this
     # caused by difference in Python / R when computing ranks?
     # assert pc.at['spearman', 'r'] == 0.578
     # Partial correlation of x and y controlling for multiple covariates
     pc = partial_corr(data=df, x='x', y='y', covar=['cv1'])
     pc = partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'])
     assert round(pc.at['pearson', 'r'], 3) == 0.493
     pc = partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'],
                       method='percbend')
     # Semi-partial correlation
     df.partial_corr(x='x', y='y', y_covar='cv1')
     pc = df.partial_corr(x='x', y='y', x_covar=['cv1', 'cv2', 'cv3'])
     assert round(pc.at['pearson', 'r'], 3) == 0.463
     pc = df.partial_corr(x='x', y='y', y_covar=['cv1', 'cv2', 'cv3'])
     assert round(pc.at['pearson', 'r'], 3) == 0.421
     partial_corr(data=df, x='x', y='y', x_covar='cv1',
                  y_covar=['cv2', 'cv3'], method='spearman')
     with pytest.raises(ValueError):
         partial_corr(data=df, x='x', y='y', covar='cv2', x_covar='cv1')
     with pytest.raises(AssertionError) as error_info:
         partial_corr(data=df, x='cv1', y='y', covar=['cv1', 'cv2'])
     assert str(error_info.value) == "x and covar must be independent"
Exemplo n.º 2
0
 def test_partial_corr(self):
     """Test function partial_corr"""
     np.random.seed(123)
     mean, cov = [4, 6, 2], [(1, .5, .3), (.5, 1, .2), (.3, .2, 1)]
     x, y, z = np.random.multivariate_normal(mean, cov, size=30).T
     df = pd.DataFrame({'x': x, 'y': y, 'z': z})
     stats = partial_corr(data=df, x='x', y='y', covar='z')
     # Compare with R ppcorr
     assert stats.loc['pearson', 'r'] == 0.568
     df['w'] = np.random.normal(size=30)
     df['v'] = np.random.normal(size=30)
     # Partial correlation of x and y controlling for z, w and v
     partial_corr(data=df, x='x', y='y', covar=['z'])
     partial_corr(data=df, x='x', y='y', covar=['z', 'w', 'v'])
     partial_corr(data=df, x='x', y='y', covar=['z', 'w', 'v'],
                  method='spearman')
Exemplo n.º 3
0
def _partial_corr(self,
                  x=None,
                  y=None,
                  covar=None,
                  x_covar=None,
                  y_covar=None,
                  tail='two-sided',
                  method='pearson'):
    """Partial and semi-partial correlation."""
    stats = partial_corr(data=self,
                         x=x,
                         y=y,
                         covar=covar,
                         x_covar=x_covar,
                         y_covar=y_covar,
                         tail=tail,
                         method=method)
    return stats
Exemplo n.º 4
0
def pairwise_corr(data,
                  columns=None,
                  covar=None,
                  tail='two-sided',
                  method='pearson',
                  padjust='none',
                  export_filename=None):
    '''Pairwise (partial) correlations between columns of a pandas dataframe.

    Parameters
    ----------
    data : pandas DataFrame
        DataFrame. Note that this function can also directly be used as a
        Pandas method, in which case this argument is no longer needed.
    columns : list or str
        Column names in data ::

        '["a", "b", "c"]' : combination between columns a, b, and c
        '["a"]' : product between a and all the other numeric columns
        '[["a"], ["b", "c"]]' : product between ["a"] and ["b", "c"]
        '[["a", "d"], ["b", "c"]]' : product between ["a", "d"] and ["b", "c"]
        '[["a", "d"], None]' : product between ["a", "d"] and all other columns

        Note that if column is not specified, then the function will return the
        pairwise correlation between the combination of all the numeric columns
        in data. See the examples section for more details on this.
    covar : None, string or list
        Covariate(s) for partial correlation. Must be one or more columns
        in data. Use a list if there are more than one covariate. If
        ``covar`` is not None, a partial correlation will be computed using
        :py:func:`pingouin.partial_corr` function.
    tail : string
        Indicates whether to return the 'two-sided' or 'one-sided' p-values
    method : string
        Specify which method to use for the computation of the correlation
        coefficient. Available methods are ::

        'pearson' : Pearson product-moment correlation
        'spearman' : Spearman rank-order correlation
        'kendall' : Kendall’s tau (ordinal data)
        'percbend' : percentage bend correlation (robust)
        'shepherd' : Shepherd's pi correlation (robust Spearman)
    padjust : string
        Method used for testing and adjustment of pvalues.
        Available methods are ::

        'none' : no correction
        'bonferroni' : one-step Bonferroni correction
        'holm' : step-down method using Bonferroni adjustments
        'fdr_bh' : Benjamini/Hochberg FDR correction
        'fdr_by' : Benjamini/Yekutieli FDR correction
    export_filename : string
        Filename (without extension) for the output file.
        If None, do not export the table.
        By default, the file will be created in the current python console
        directory. To change that, specify the filename with full path.

    Returns
    -------
    stats : DataFrame
        Stats summary ::

        'X' : Name(s) of first columns
        'Y' : Name(s) of second columns
        'method' : method used to compute the correlation
        'covar' : List of specified covariate(s) (only for partial correlation)
        'tail' : indicates whether the p-values are one-sided or two-sided
        'n' : Sample size (after NaN removal)
        'r' : Correlation coefficients
        'CI95' : 95% parametric confidence intervals
        'r2' : R-squared values
        'adj_r2' : Adjusted R-squared values
        'z' : Standardized correlation coefficients
        'p-unc' : uncorrected one or two tailed p-values
        'p-corr' : corrected one or two tailed p-values
        'p-adjust' : Correction method

    Notes
    -----
    Please refer to the :py:func:`pingouin.corr()` function for a description
    of the different methods. NaN are automatically removed from the data.

    This function is more flexible and gives a much more detailed
    output than the :py:func:`pandas.DataFrame.corr()` method (i.e. p-values,
    confidence interval, Bayes Factor..). This comes however at
    an increased computational cost. While this should not be discernible for
    dataframe with less than 10,000 rows and/or less than 20 columns, this
    function can be slow for very large dataset. For speed purpose, the Bayes
    Factor is only computed when the sample size is less than 1000
    (and method='pearson').

    This function also works with two-dimensional multi-index columns. In this
    case, columns must be list(s) of tuple(s). See the Jupyter notebook
    for more details:
    https://github.com/raphaelvallat/pingouin/blob/master/notebooks/04_Correlations.ipynb

    If ``covar`` is specified, this function will compute the pairwise partial
    correlation between the variables. If you are only interested in computing
    the partial correlation matrix (i.e. the raw pairwise partial correlation
    coefficient matrix, without the p-values, sample sizes, etc), a better
    alternative is to use the :py:func:`pingouin.pcorr` function (see
    example 7).

    Examples
    --------
    1. One-tailed spearman correlation corrected for multiple comparisons

    >>> from pingouin import pairwise_corr, read_dataset
    >>> data = read_dataset('pairwise_corr').iloc[:, 1:]
    >>> pairwise_corr(data, method='spearman', tail='two-sided',
    ...               padjust='bonf')  # doctest: +SKIP

    2. Robust two-sided correlation with uncorrected p-values

    >>> pcor = pairwise_corr(data, columns=['Openness', 'Extraversion',
    ...                                     'Neuroticism'], method='percbend')

    3. One-versus-all pairwise correlations

    >>> pairwise_corr(data, columns=['Neuroticism'])  # doctest: +SKIP

    4. Pairwise correlations between two lists of columns (cartesian product)

    >>> columns = [['Neuroticism', 'Extraversion'], ['Openness']]
    >>> pairwise_corr(data, columns)   # doctest: +SKIP

    5. As a Pandas method

    >>> pcor = data.pairwise_corr(covar='Neuroticism', method='spearman')

    6. Pairwise partial correlation

    >>> pcor = pairwise_corr(data, covar='Neuroticism')  # One covariate
    >>> pcor = pairwise_corr(data, covar=['Neuroticism', 'Openness'])  # Two

    7. Pairwise partial correlation matrix (only the r-values)

    >>> data[['Neuroticism', 'Openness', 'Extraversion']].pcorr()
                  Neuroticism  Openness  Extraversion
    Neuroticism      1.000000  0.092097     -0.360421
    Openness         0.092097  1.000000      0.281312
    Extraversion    -0.360421  0.281312      1.000000
    '''
    from pingouin.correlation import corr, partial_corr

    if tail not in ['one-sided', 'two-sided']:
        raise ValueError('Tail not recognized')

    # Keep only numeric columns
    data = data._get_numeric_data()
    # Remove columns with constant value and/or NaN
    data = data.loc[:, data.nunique(dropna=True) >= 2]
    # Extract columns names
    keys = data.columns.tolist()

    # First ensure that columns is a list
    if isinstance(columns, (str, tuple)):
        columns = [columns]

    def traverse(o, tree_types=(list, tuple)):
        """Helper function to flatten nested lists.
        From https://stackoverflow.com/a/6340578
        """
        if isinstance(o, tree_types):
            for value in o:
                for subvalue in traverse(value, tree_types):
                    yield subvalue
        else:
            yield o

    # Check if columns index has multiple levels
    if isinstance(data.columns, pd.core.index.MultiIndex):
        multi_index = True
        if columns is not None:
            # Simple List with one element: [('L0', 'L1')]
            # Simple list with >= 2 elements: [('L0', 'L1'), ('L0', 'L2')]
            # Nested lists: [[('L0', 'L1')], ...] or [..., [('L0', 'L1')]]
            col_flatten = list(traverse(columns, tree_types=list))
            assert all(isinstance(c, (tuple, type(None))) for c in col_flatten)
    else:
        multi_index = False

    # Then define combinations / products between columns
    if columns is None:
        # Case A: column is not defined --> corr between all numeric columns
        combs = list(combinations(keys, 2))
    else:
        # Case B: column is specified
        if isinstance(columns[0], list):
            group1 = [e for e in columns[0] if e in keys]
            # Assert that column is two-dimensional
            if len(columns) == 1:
                columns.append(None)
            if isinstance(columns[1], list) and len(columns[1]):
                # B1: [['a', 'b'], ['c', 'd']]
                group2 = [e for e in columns[1] if e in keys]
            else:
                # B2: [['a', 'b']], [['a', 'b'], None] or [['a', 'b'], 'all']
                group2 = [e for e in keys if e not in group1]
            combs = list(product(group1, group2))
        else:
            # Column is a simple list
            if len(columns) == 1:
                # Case B3: one-versus-all, e.g. ['a'] or 'a'
                # Check that this column exist
                if columns[0] not in keys:
                    msg = ('"%s" is not in data or is not numeric.' %
                           columns[0])
                    raise ValueError(msg)
                others = [e for e in keys if e != columns[0]]
                combs = list(product(columns, others))
            else:
                # Combinations between all specified columns ['a', 'b', 'c']
                # Make sure that we keep numeric columns
                columns = [c for c in columns if c in keys]
                if len(columns) == 1:
                    # If only one-column is left, equivalent to ['a']
                    others = [e for e in keys if e != columns[0]]
                    combs = list(product(columns, others))
                else:
                    # combinations between ['a', 'b', 'c']
                    combs = list(combinations(columns, 2))

    combs = np.array(combs)
    if len(combs) == 0:
        raise ValueError("No column combination found. Please make sure that "
                         "the specified columns exist in the dataframe, are "
                         "numeric, and contains at least two unique values.")

    # Initialize empty dataframe
    if multi_index:
        X = list(zip(combs[:, 0, 0], combs[:, 0, 1]))
        Y = list(zip(combs[:, 1, 0], combs[:, 1, 1]))
    else:
        X = combs[:, 0]
        Y = combs[:, 1]
    stats = pd.DataFrame({
        'X': X,
        'Y': Y,
        'method': method,
        'tail': tail
    },
                         index=range(len(combs)),
                         columns=[
                             'X', 'Y', 'method', 'tail', 'n', 'outliers', 'r',
                             'CI95%', 'r2', 'adj_r2', 'p-val', 'BF10', 'power'
                         ])

    # Now we check if covariates are present
    if covar is not None:
        assert isinstance(covar, (str, list)), 'covar must be list or string.'
        if isinstance(covar, str):
            covar = [covar]
        # Check that columns exist and are numeric
        assert all([c in keys for c in covar]), 'covar not in data or not num.'
        # And we make sure that X or Y does not contain covar
        stats = stats[~stats[['X', 'Y']].isin(covar).any(1)]
        stats = stats.reset_index(drop=True)
        if stats.shape[0] == 0:
            raise ValueError("No column combination found. Please make sure "
                             "that the specified columns and covar exist in "
                             "the dataframe, are numeric, and contains at "
                             "least two unique values.")

    # Compute pairwise correlations and fill dataframe
    dvs = ['n', 'r', 'CI95%', 'r2', 'adj_r2', 'p-val', 'power']
    dvs_out = dvs + ['outliers']
    dvs_bf10 = dvs + ['BF10']
    for i in range(stats.shape[0]):
        col1, col2 = stats.loc[i, 'X'], stats.loc[i, 'Y']
        if covar is None:
            cor_st = corr(data[col1].values,
                          data[col2].values,
                          tail=tail,
                          method=method)
        else:
            cor_st = partial_corr(data=data,
                                  x=col1,
                                  y=col2,
                                  covar=covar,
                                  tail=tail,
                                  method=method)
        cor_st_keys = cor_st.columns.tolist()
        if 'BF10' in cor_st_keys:
            stats.loc[i, dvs_bf10] = cor_st[dvs_bf10].values
        elif 'outliers' in cor_st_keys:
            stats.loc[i, dvs_out] = cor_st[dvs_out].values
        else:
            stats.loc[i, dvs] = cor_st[dvs].values

    # Force conversion to numeric
    stats = stats.astype({
        'r': float,
        'r2': float,
        'adj_r2': float,
        'n': int,
        'p-val': float,
        'outliers': float,
        'power': float
    })

    # Multiple comparisons
    stats = stats.rename(columns={'p-val': 'p-unc'})
    padjust = None if stats['p-unc'].size <= 1 else padjust
    if padjust is not None:
        if padjust.lower() != 'none':
            reject, stats['p-corr'] = multicomp(stats['p-unc'].values,
                                                method=padjust)
            stats['p-adjust'] = padjust
    else:
        stats['p-corr'] = None
        stats['p-adjust'] = None

    # Standardize correlation coefficients (Fisher z-transformation)
    stats['z'] = np.round(np.arctanh(stats['r'].values), 3)

    col_order = [
        'X', 'Y', 'method', 'tail', 'n', 'outliers', 'r', 'CI95%', 'r2',
        'adj_r2', 'z', 'p-unc', 'p-corr', 'p-adjust', 'BF10', 'power'
    ]

    # Reorder columns and remove empty ones
    stats = stats.reindex(columns=col_order)
    stats = stats.dropna(how='all', axis=1)

    # Add covariates names if present
    if covar is not None:
        stats.insert(loc=3, column='covar', value=str(covar))

    if export_filename is not None:
        _export_table(stats, export_filename)
    return stats
Exemplo n.º 5
0
    def test_partial_corr(self):
        """Test function partial_corr.

        Compare with the R package ppcor (which is also used by JASP).
        """
        df = read_dataset('partial_corr')
        #######################################################################
        # PARTIAL CORRELATION
        #######################################################################
        # With one covariate
        pc = partial_corr(data=df, x='x', y='y', covar='cv1')
        assert round(pc.at['pearson', 'r'], 7) == 0.5681692
        assert round(pc.at['pearson', 'p-val'], 9) == 0.001303059
        # With two covariates
        pc = partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2'])
        assert round(pc.at['pearson', 'r'], 7) == 0.5344372
        assert round(pc.at['pearson', 'p-val'], 9) == 0.003392904
        # With three covariates
        # in R: pcor.test(x=df$x, y=df$y, z=df[, c("cv1", "cv2", "cv3")])
        pc = partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'])
        assert round(pc.at['pearson', 'r'], 7) == 0.4926007
        assert round(pc.at['pearson', 'p-val'], 9) == 0.009044164
        # Method == "spearman"
        pc = partial_corr(data=df,
                          x='x',
                          y='y',
                          covar=['cv1', 'cv2', 'cv3'],
                          method="spearman")
        assert round(pc.at['spearman', 'r'], 7) == 0.5209208
        assert round(pc.at['spearman', 'p-val'], 9) == 0.005336187

        #######################################################################
        # SEMI-PARTIAL CORRELATION
        #######################################################################
        # With one covariate
        pc = partial_corr(data=df, x='x', y='y', y_covar='cv1')
        assert round(pc.at['pearson', 'r'], 7) == 0.5670793
        assert round(pc.at['pearson', 'p-val'], 9) == 0.001337718
        # With two covariates
        pc = partial_corr(data=df, x='x', y='y', y_covar=['cv1', 'cv2'])
        assert round(pc.at['pearson', 'r'], 7) == 0.5097489
        assert round(pc.at['pearson', 'p-val'], 9) == 0.005589687
        # With three covariates
        # in R: spcor.test(x=df$x, y=df$y, z=df[, c("cv1", "cv2", "cv3")])
        pc = partial_corr(data=df, x='x', y='y', y_covar=['cv1', 'cv2', 'cv3'])
        assert round(pc.at['pearson', 'r'], 7) == 0.4212351
        assert round(pc.at['pearson', 'p-val'], 8) == 0.02865483
        # With three covariates (x_covar)
        pc = partial_corr(data=df, x='x', y='y', x_covar=['cv1', 'cv2', 'cv3'])
        assert round(pc.at['pearson', 'r'], 7) == 0.4631883
        assert round(pc.at['pearson', 'p-val'], 8) == 0.01496857

        # Method == "spearman"
        pc = partial_corr(data=df,
                          x='x',
                          y='y',
                          y_covar=['cv1', 'cv2', 'cv3'],
                          method="spearman")
        assert round(pc.at['spearman', 'r'], 7) == 0.4597143
        assert round(pc.at['spearman', 'p-val'], 8) == 0.01584262

        #######################################################################
        # ERROR
        #######################################################################
        with pytest.raises(TypeError):
            # TypeError: partial_corr() got an unexpected keyword argument 'tail'
            partial_corr(data=df, x='x', y='y', covar='cv1', tail='error')
        with pytest.raises(ValueError):
            partial_corr(data=df, x='x', y='y', covar='cv2', x_covar='cv1')
        with pytest.raises(ValueError):
            partial_corr(data=df, x='x', y='y', x_covar='cv2', y_covar='cv1')
        with pytest.raises(AssertionError) as error_info:
            partial_corr(data=df, x='cv1', y='y', covar=['cv1', 'cv2'])
        assert str(error_info.value) == "x and covar must be independent"
Exemplo n.º 6
0
    def test_partial_corr(self):
        """Test function partial_corr.

        Compare with the R package ppcor (which is also used by JASP).
        """
        df = read_dataset('partial_corr')
        #######################################################################
        # PARTIAL CORRELATION
        #######################################################################
        # With one covariate
        pc = partial_corr(data=df, x='x', y='y', covar='cv1')
        assert round(pc.at['pearson', 'r'], 7) == 0.5681692
        assert round(pc.at['pearson', 'p-val'], 9) == 0.001303059
        # With two covariates
        pc = partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2'])
        assert round(pc.at['pearson', 'r'], 7) == 0.5344372
        assert round(pc.at['pearson', 'p-val'], 9) == 0.003392904
        # With three covariates
        # in R: pcor.test(x=df$x, y=df$y, z=df[, c("cv1", "cv2", "cv3")])
        pc = partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'])
        assert round(pc.at['pearson', 'r'], 7) == 0.4926007
        assert round(pc.at['pearson', 'p-val'], 9) == 0.009044164
        # Method == "spearman"
        # Warning: Spearman slightly different than ppcor package. I think this
        # is because the latter uses an inverse covariance matrix instead of a
        # residual approach:
        # https://github.com/cran/ppcor/blob/master/R/ppcor_v1.01.R
        # >>> cvx <- cov(x, method=method)
        # >>> icvx < - ginv(cvx)
        # >>> pcor < - -cov2cor(icvx)
        pc = partial_corr(data=df, x='x', y='y', covar=['cv1', 'cv2', 'cv3'],
                          method="spearman")
        # assert round(pc.at['spearman', 'r'], 7) == 0.5209208
        # assert round(pc.at['spearman', 'p-val'], 9) == 0.005336187

        # Test with other method
        for method in ['kendall', 'bicor', 'skipped', 'percbend', 'shepherd']:
            partial_corr(data=df, x='x', y='y', covar=['cv1'], method=method)

        #######################################################################
        # SEMI-PARTIAL CORRELATION
        #######################################################################
        # With one covariate
        pc = partial_corr(data=df, x='x', y='y', y_covar='cv1')
        assert round(pc.at['pearson', 'r'], 7) == 0.5670793
        assert round(pc.at['pearson', 'p-val'], 9) == 0.001337718
        # With two covariates
        pc = partial_corr(data=df, x='x', y='y', y_covar=['cv1', 'cv2'])
        assert round(pc.at['pearson', 'r'], 7) == 0.5097489
        assert round(pc.at['pearson', 'p-val'], 9) == 0.005589687
        # With three covariates
        # in R: spcor.test(x=df$x, y=df$y, z=df[, c("cv1", "cv2", "cv3")])
        pc = partial_corr(data=df, x='x', y='y', y_covar=['cv1', 'cv2', 'cv3'])
        assert round(pc.at['pearson', 'r'], 7) == 0.4212351
        assert round(pc.at['pearson', 'p-val'], 8) == 0.02865483
        # With three covariates (x_covar)
        pc = partial_corr(data=df, x='x', y='y', x_covar=['cv1', 'cv2', 'cv3'])
        assert round(pc.at['pearson', 'r'], 7) == 0.4631883
        assert round(pc.at['pearson', 'p-val'], 8) == 0.01496857

        # Method == "spearman"
        # Warning: Spearman slightly different than ppcor package.
        pc = partial_corr(data=df, x='x', y='y', y_covar=['cv1', 'cv2', 'cv3'],
                          method="spearman")
        # assert round(pc.at['spearman', 'r'], 7) == 0.4597143
        # assert round(pc.at['spearman', 'p-val'], 8) == 0.01584262

        #######################################################################
        # ERROR
        #######################################################################
        with pytest.raises(ValueError):
            partial_corr(data=df, x='x', y='y', covar='cv2', x_covar='cv1')
        with pytest.raises(AssertionError) as error_info:
            partial_corr(data=df, x='cv1', y='y', covar=['cv1', 'cv2'])
        assert str(error_info.value) == "x and covar must be independent"
Exemplo n.º 7
0
def pairwise_corr(data, columns=None, covar=None, tail='two-sided',
                  method='pearson', padjust='none', nan_policy='pairwise'):
    """Pairwise (partial) correlations between columns of a pandas dataframe.

    Parameters
    ----------
    data : :py:class:`pandas.DataFrame`
        DataFrame. Note that this function can also directly be used as a
        Pandas method, in which case this argument is no longer needed.
    columns : list or str
        Column names in data:

        * ``["a", "b", "c"]``: combination between columns a, b, and c.
        * ``["a"]``: product between a and all the other numeric columns.
        * ``[["a"], ["b", "c"]]``: product between ["a"] and ["b", "c"].
        * ``[["a", "d"], ["b", "c"]]``: product between ["a", "d"] and
          ["b", "c"].
        * ``[["a", "d"], None]``: product between ["a", "d"] and all other
          numeric columns in dataframe.

        If column is None, the function will return the pairwise correlation
        between the combination of all the numeric columns in data.
        See the examples section for more details on this.
    covar : None, string or list
        Covariate(s) for partial correlation. Must be one or more columns
        in data. Use a list if there are more than one covariate. If
        ``covar`` is not None, a partial correlation will be computed using
        :py:func:`pingouin.partial_corr` function.
    tail : string
        Specify whether to return ``'one-sided'`` or ``'two-sided'`` p-value.
        Note that the former are simply half the latter.
    method : string
        Correlation type:

        * ``'pearson'``: Pearson :math:`r` product-moment correlation
        * ``'spearman'``: Spearman :math:`\\rho` rank-order correlation
        * ``'kendall'``: Kendall's :math:`\\tau` correlation
          (for ordinal data)
        * ``'bicor'``: Biweight midcorrelation (robust)
        * ``'percbend'``: Percentage bend correlation (robust)
        * ``'shepherd'``: Shepherd's pi correlation (robust)
        * ``'skipped'``: Skipped correlation (robust)
    padjust : string
        Method used for testing and adjustment of pvalues.

        * ``'none'``: no correction
        * ``'bonf'``: one-step Bonferroni correction
        * ``'sidak'``: one-step Sidak correction
        * ``'holm'``: step-down method using Bonferroni adjustments
        * ``'fdr_bh'``: Benjamini/Hochberg FDR correction
        * ``'fdr_by'``: Benjamini/Yekutieli FDR correction
    nan_policy : string
        Can be ``'listwise'`` for listwise deletion of missing values
        (= complete-case analysis) or ``'pairwise'`` (default) for the more
        liberal pairwise deletion (= available-case analysis).

        .. versionadded:: 0.2.9

    Returns
    -------
    stats : :py:class:`pandas.DataFrame`

        * ``'X'``: Name(s) of first columns.
        * ``'Y'``: Name(s) of second columns.
        * ``'method'``: Correlation type.
        * ``'covar'``: List of specified covariate(s), only when covariates
          are passed.
        * ``'tail'``: Tail of the test.
        * ``'n'``: Sample size (after removal of missing values).
        * ``'r'``: Correlation coefficients.
        * ``'CI95'``: 95% parametric confidence intervals.
        * ``'r2'``: R-squared values.
        * ``'adj_r2'``: Adjusted R-squared values.
        * ``'z'``: Standardized correlation coefficients.
        * ``'p-unc'``: Uncorrected p-values.
        * ``'p-corr'``: Corrected p-values.
        * ``'p-adjust'``: P-values correction method.
        * ``'BF10'``: Bayes Factor of the alternative hypothesis
          (only for Pearson correlation)
        * ``'power'``: achieved power of the test (= 1 - type II error).

    Notes
    -----
    Please refer to the :py:func:`pingouin.corr()` function for a description
    of the different methods. NaN are automatically removed from the data using
    a pairwise deletion.

    This function is more flexible and gives a much more detailed
    output than the :py:func:`pandas.DataFrame.corr()` method (i.e. p-values,
    confidence interval, Bayes Factor...). This comes however at
    an increased computational cost. While this should not be discernible for
    dataframe with less than 10,000 rows and/or less than 20 columns, this
    function can be slow for very large dataset.

    A faster alternative to get the r-values and p-values in a matrix format is
    to use the :py:func:`pingouin.rcorr` function, which works directly as a
    :py:class:`pandas.DataFrame` method (see example below).

    This function also works with two-dimensional multi-index columns. In this
    case, columns must be list(s) of tuple(s). Please refer to this `example
    Jupyter notebook
    <https://github.com/raphaelvallat/pingouin/blob/master/notebooks/04_Correlations.ipynb>`_
    for more details.

    If ``covar`` is specified, this function will compute the pairwise partial
    correlation between the variables. If you are only interested in computing
    the partial correlation matrix (i.e. the raw pairwise partial correlation
    coefficient matrix, without the p-values, sample sizes, etc), a better
    alternative is to use the :py:func:`pingouin.pcorr` function (see
    example 7).

    Examples
    --------
    1. One-sided spearman correlation corrected for multiple comparisons

    >>> from pingouin import pairwise_corr, read_dataset
    >>> data = read_dataset('pairwise_corr').iloc[:, 1:]
    >>> pairwise_corr(data, method='spearman', tail='one-sided',
    ...               padjust='bonf')  # doctest: +SKIP

    2. Robust two-sided biweight midcorrelation with uncorrected p-values

    >>> pcor = pairwise_corr(data, columns=['Openness', 'Extraversion',
    ...                                     'Neuroticism'], method='bicor')

    3. One-versus-all pairwise correlations

    >>> pairwise_corr(data, columns=['Neuroticism'])  # doctest: +SKIP

    4. Pairwise correlations between two lists of columns (cartesian product)

    >>> columns = [['Neuroticism', 'Extraversion'], ['Openness']]
    >>> pairwise_corr(data, columns)   # doctest: +SKIP

    5. As a Pandas method

    >>> pcor = data.pairwise_corr(covar='Neuroticism', method='spearman')

    6. Pairwise partial correlation

    >>> pcor = pairwise_corr(data, covar='Neuroticism')  # One covariate
    >>> pcor = pairwise_corr(data, covar=['Neuroticism', 'Openness'])  # Two

    7. Pairwise partial correlation matrix using :py:func:`pingouin.pcorr`

    >>> data[['Neuroticism', 'Openness', 'Extraversion']].pcorr()
                  Neuroticism  Openness  Extraversion
    Neuroticism      1.000000  0.092097     -0.360421
    Openness         0.092097  1.000000      0.281312
    Extraversion    -0.360421  0.281312      1.000000

    8. Correlation matrix with p-values using :py:func:`pingouin.rcorr`

    >>> data[['Neuroticism', 'Openness', 'Extraversion']].rcorr()
                 Neuroticism Openness Extraversion
    Neuroticism            -                   ***
    Openness           -0.01        -          ***
    Extraversion       -0.35    0.267            -
    """
    from pingouin.correlation import corr, partial_corr

    # Check arguments
    assert tail in ['one-sided', 'two-sided']
    assert nan_policy in ['listwise', 'pairwise']

    # Keep only numeric columns
    data = data._get_numeric_data()
    # Remove columns with constant value and/or NaN
    data = data.loc[:, data.nunique(dropna=True) >= 2]
    # Extract columns names
    keys = data.columns.tolist()

    # First ensure that columns is a list
    if isinstance(columns, (str, tuple)):
        columns = [columns]

    def traverse(o, tree_types=(list, tuple)):
        """Helper function to flatten nested lists.
        From https://stackoverflow.com/a/6340578
        """
        if isinstance(o, tree_types):
            for value in o:
                for subvalue in traverse(value, tree_types):
                    yield subvalue
        else:
            yield o

    # Check if columns index has multiple levels
    pdv = pd.__version__
    mindex = pd.MultiIndex if pdv.startswith('1') else pd.core.index.MultiIndex
    if isinstance(data.columns, mindex):
        multi_index = True
        if columns is not None:
            # Simple List with one element: [('L0', 'L1')]
            # Simple list with >= 2 elements: [('L0', 'L1'), ('L0', 'L2')]
            # Nested lists: [[('L0', 'L1')], ...] or [..., [('L0', 'L1')]]
            col_flatten = list(traverse(columns, tree_types=list))
            assert all(isinstance(c, (tuple, type(None))) for c in col_flatten)
    else:
        multi_index = False

    # Then define combinations / products between columns
    if columns is None:
        # Case A: column is not defined --> corr between all numeric columns
        combs = list(combinations(keys, 2))
    else:
        # Case B: column is specified
        if isinstance(columns[0], list):
            group1 = [e for e in columns[0] if e in keys]
            # Assert that column is two-dimensional
            if len(columns) == 1:
                columns.append(None)
            if isinstance(columns[1], list) and len(columns[1]):
                # B1: [['a', 'b'], ['c', 'd']]
                group2 = [e for e in columns[1] if e in keys]
            else:
                # B2: [['a', 'b']], [['a', 'b'], None] or [['a', 'b'], 'all']
                group2 = [e for e in keys if e not in group1]
            combs = list(product(group1, group2))
        else:
            # Column is a simple list
            if len(columns) == 1:
                # Case B3: one-versus-all, e.g. ['a'] or 'a'
                # Check that this column exist
                if columns[0] not in keys:
                    msg = ('"%s" is not in data or is not numeric.'
                           % columns[0])
                    raise ValueError(msg)
                others = [e for e in keys if e != columns[0]]
                combs = list(product(columns, others))
            else:
                # Combinations between all specified columns ['a', 'b', 'c']
                # Make sure that we keep numeric columns
                columns = [c for c in columns if c in keys]
                if len(columns) == 1:
                    # If only one-column is left, equivalent to ['a']
                    others = [e for e in keys if e != columns[0]]
                    combs = list(product(columns, others))
                else:
                    # combinations between ['a', 'b', 'c']
                    combs = list(combinations(columns, 2))

    combs = np.array(combs)
    if len(combs) == 0:
        raise ValueError("No column combination found. Please make sure that "
                         "the specified columns exist in the dataframe, are "
                         "numeric, and contains at least two unique values.")

    # Initialize empty dataframe
    if multi_index:
        X = list(zip(combs[:, 0, 0], combs[:, 0, 1]))
        Y = list(zip(combs[:, 1, 0], combs[:, 1, 1]))
    else:
        X = combs[:, 0]
        Y = combs[:, 1]
    stats = pd.DataFrame({'X': X, 'Y': Y, 'method': method, 'tail': tail},
                         index=range(len(combs)),
                         columns=['X', 'Y', 'method', 'tail', 'n', 'outliers',
                                  'r', 'CI95%', 'r2', 'adj_r2', 'p-val',
                                  'BF10', 'power'])

    # Now we check if covariates are present
    if covar is not None:
        assert isinstance(covar, (str, list)), 'covar must be list or string.'
        if isinstance(covar, str):
            covar = [covar]
        # Check that columns exist and are numeric
        assert all([c in keys for c in covar]), 'covar not in data or not num.'
        # And we make sure that X or Y does not contain covar
        stats = stats[~stats[['X', 'Y']].isin(covar).any(1)]
        stats = stats.reset_index(drop=True)
        if stats.shape[0] == 0:
            raise ValueError("No column combination found. Please make sure "
                             "that the specified columns and covar exist in "
                             "the dataframe, are numeric, and contains at "
                             "least two unique values.")

    # Listwise deletion of missing values
    if nan_policy == 'listwise':
        all_cols = np.unique(stats[['X', 'Y']].to_numpy()).tolist()
        if covar is not None:
            all_cols.extend(covar)
        data = data[all_cols].dropna()

    # Compute pairwise correlations and fill dataframe
    dvs = ['n', 'r', 'CI95%', 'r2', 'adj_r2', 'p-val', 'power']
    dvs_out = dvs + ['outliers']
    dvs_bf10 = dvs + ['BF10']
    for i in range(stats.shape[0]):
        col1, col2 = stats.at[i, 'X'], stats.at[i, 'Y']
        if covar is None:
            cor_st = corr(data[col1].to_numpy(), data[col2].to_numpy(),
                          tail=tail, method=method)
        else:
            cor_st = partial_corr(data=data, x=col1, y=col2, covar=covar,
                                  tail=tail, method=method)
        cor_st_keys = cor_st.columns.tolist()
        if 'BF10' in cor_st_keys:
            stats.loc[i, dvs_bf10] = cor_st[dvs_bf10].to_numpy()
        elif 'outliers' in cor_st_keys:
            stats.loc[i, dvs_out] = cor_st[dvs_out].to_numpy()
        else:
            stats.loc[i, dvs] = cor_st[dvs].to_numpy()

    # Force conversion to numeric
    stats = stats.astype({'r': float, 'r2': float, 'adj_r2': float,
                          'n': int, 'p-val': float, 'outliers': float,
                          'power': float})

    # Multiple comparisons
    stats = stats.rename(columns={'p-val': 'p-unc'})
    padjust = None if stats['p-unc'].size <= 1 else padjust
    if padjust is not None:
        if padjust.lower() != 'none':
            reject, stats['p-corr'] = multicomp(stats['p-unc'].to_numpy(),
                                                method=padjust)
            stats['p-adjust'] = padjust
    else:
        stats['p-corr'] = None
        stats['p-adjust'] = None

    # Standardize correlation coefficients (Fisher z-transformation)
    stats['z'] = np.arctanh(stats['r'].to_numpy())

    col_order = ['X', 'Y', 'method', 'tail', 'n', 'outliers', 'r', 'CI95%',
                 'r2', 'adj_r2', 'z', 'p-unc', 'p-corr', 'p-adjust',
                 'BF10', 'power']

    # Reorder columns and remove empty ones
    stats = stats.reindex(columns=col_order).dropna(how='all', axis=1)

    # Add covariates names if present
    if covar is not None:
        stats.insert(loc=3, column='covar', value=str(covar))

    return stats