Python pandas_to_arrays примеры, hypothesize.utilities.pandas_to_arrays Python примеры использования

Пример #1

0

Показать файл

Файл: _measuring_associations.py Проект: bwettimz/hypothesize

def wincor(x, y, tr=.2):

    """
    Compute the winsorized correlation between `x` and `y`.
    This function also returns the winsorized covariance.


    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param tr: float
    Proportion to winsorize (default is .2)

    :return:
    Dictionary of results

    cor: float
    Winsorized correlation

    nval: int
    Number of observations

    sig: float
    p-value

    wcov: float
    Winsorized covariance
    """

    if type(x) is not np.ndarray:
        x, y=pandas_to_arrays([x, y])

    m1 = np.c_[x, y] # cbind
    m1 = m1[~np.isnan(m1).any(axis=1)]
    nval = m1.shape[0]
    x = m1[:, 0]
    y = m1[:, 1]
    g = np.floor(tr * len(x))
    xvec = winsorize(x, limits=(tr,tr))
    yvec = winsorize(y, limits=(tr,tr))
    wcor = np.corrcoef(xvec, yvec)[0,1]
    wcov = np.cov(xvec, yvec)[0,1]
    test = wcor * np.sqrt((len(x) - 2) / (1. - wcor ** 2))
    sig = 2 * (1 - t.cdf(abs(test), len(x) - 2 * g - 2))

    res={'cor': wcor, 'wcov': wcov, 'sig': sig, 'nval': nval}

    return res

Пример #2

0

Показать файл

Файл: _measuring_associations.py Проект: bwettimz/hypothesize

def corb(corfun, x, y, alpha, nboot, *args, seed=False):

    """
    Compute a 1-alpha confidence interval for a
    correlation using percentile bootstrap method
    The function `corfun` is any function that returns a
    correlation coefficient. The functions pbcor and
    wincor follow this convention. When using
    Pearson's correlation, and when n<250, use
    lsfitci instead (not yet implemented).

    Note that arguments up to and including `args` are positional arguments

    :param corfun: function
    corfun is any function that returns a correlation coefficient

    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param alpha: float
    Alpha level (default is .05)

    :param nboot: int
    Number of bootstrap samples

    :param args: list/value
    List of arguments to corfun (e.g., .2)

    :param seed: bool
    Random seed for reprodicible results. Default is `False`.

    :return:
    Dictionary of results

    ci: list
    Confidence interval

    cor: float
    Correlation estimate

    p_value: float
    p-value

    """

    x, y=pandas_to_arrays([x, y])


    m1 = np.c_[x, y] # cbind
    m1 = m1[~np.isnan(m1).any(axis=1)]
    nval = m1.shape[0]
    x = m1[:, 0]
    y = m1[:, 1]
    est = corfun(x, y, *args)['cor']#[0]

    if seed:
        np.random.seed(seed)

    data_inds = np.random.choice(len(x), size=(nboot, len(x)))
    bvec = np.array([corbsub(row_inds, x, y, corfun, *args) for row_inds in data_inds])

    ihi = int(np.floor((1 - alpha / 2) * nboot + .5))
    ilow = int(np.floor((alpha / 2) * nboot + .5))
    bsort = sorted(bvec)
    corci = [bsort[ilow], bsort[ihi]]
    phat = sum(bvec < 0) / nboot
    sig =  2 * min(phat, 1 - phat)

    #return corci, sig, est
    return {'ci': corci, 'p_value': sig, 'cor': est}

Пример #3

0

Показать файл

Файл: _measuring_associations.py Проект: bwettimz/hypothesize

def pbcor(x, y, beta=.2):

    """
    Compute the percentage bend
    correlation between `x` and `y`


    :param x: Pandas Series
    Data for group one

    :param y: Pandas Series
    Data for group two

    :param beta: float
    `0 < beta < .5`. Beta is analogous to trimming in
    other functions and related to the measure of
    dispersion used in the percentage bend
    calculation.

    :return:
    Dictionary of results

    cor: float
    Correlation

    nval: int
    Number of observations

    p_value
    p-value

    test: float
    Test statistic

    """

    if type(x) is not np.ndarray:
        x, y = pandas_to_arrays([x, y])

    if len(x) != len(y):
        raise Exception("The arrays do not have equal lengths")

    m1 = np.c_[x, y] # cbind
    m1 = m1[~np.isnan(m1).any(axis=1)]
    nval = m1.shape[0]
    x = m1[:, 0]
    y = m1[:, 1]
    temp = np.sort(abs(x - np.median(x)))
    omhatx = temp[int(np.floor((1 - beta) * len(x)))-1]
    temp = np.sort(abs(y - np.median(y)))
    omhaty = temp[int(np.floor((1 - beta) * len(y)))-1]

    a = (x - pbos(x, beta)) / omhatx
    b = (y - pbos(y, beta)) / omhaty

    a = np.where(a <= -1, -1, a)
    a = np.where(a >= 1, 1, a)
    b = np.where(b <= -1, -1, b)
    b = np.where(b >= 1, 1, b)

    pbcor_result = sum(a * b) / np.sqrt(sum(a ** 2) * sum(b ** 2))
    test = pbcor_result * np.sqrt((len(x) - 2) / (1 - pbcor_result ** 2))
    sig = 2 * (1 - t.cdf(abs(test), len(x) - 2))

    res = {'cor': pbcor_result, 'test': test, 'p_value': sig, 'nval': nval}
    return res

Пример #4

0

Показать файл