Пример #1
0
def two_point(data, bins):
    """Two-point correlation function, using Landy-Szalay method

    Parameters
    ----------
    data : array_like
        input data, shape = [n_samples, n_features] (2D ndarray)
    bins : array_like
        bins within which to compute the 2-point correlation.
        shape = Nbins + 1 (1D ndarray)

    Returns
    -------
    corr : ndarray
        the estimate of the correlation function within each bin
        shape = Nbins
    """
    data = np.asarray(data)
    bins = np.asarray(bins)
    rng = check_random_state(None)

    n_samples, n_features = data.shape
    Nbins = len(bins) - 1

    # shuffle around an axis, making background dist.
    data_R = data.copy()
    for i in range(n_features - 1):
        rng.shuffle(data_R[:, i])

    factor = len(data_R) * 1. / len(data)

    # Fast two-point correlation functions added in scikit-learn v. 0.14
    # Makes tree to embed pairwise distances, increasing look-up speed
    KDT_D = KDTree(data)  # actual distances
    KDT_R = KDTree(data_R)  # randomized background distances

    counts_DD = KDT_D.two_point_correlation(data, bins)  # number of points within bins[i] radius
    counts_RR = KDT_R.two_point_correlation(data_R, bins)  # " " for randomized background


    DD = np.diff(counts_DD)  # number of points in a disc from bins[i-1] to bins[i]
    RR = np.diff(counts_RR)  # " " for randomized background

    # make zeros 1 for numerical stability (finite difference problems)
    RR_zero = (RR == 0)  # mask creation
    RR[RR_zero] = 1  # apply update


    counts_DR = KDT_R.two_point_correlation(data, bins)  # cross-correlation betw. actual and random

    DR = np.diff(counts_DR)  # binned cross-corr

    corr = (factor ** 2 * DD - 2 * factor * DR + RR) / RR  # the Landy-Szalay formula

    corr[RR_zero] = np.nan  # back-apply the zeros found in RR

    return corr
Пример #2
0
def two_point(data,data_R,bins,method='landy-szalay',seed=1234,saverandom=False):
    """
    Uses nearest neighbors KDtree to evaluate two point correlation
    
    args:
        data: n samples x m features data array, eg. x,y,z positions
        bins: 1d bins array
    return:
        two - pt correlation correlation give the method.
        Errors are not returned. A bootstrap sampling can be run N times to 
        evaluate errors.         
    """
    from sklearn.neighbors import KDTree
    data = np.asarray(data)
    bins = np.asarray(bins)
    rng = np.random.RandomState(seed)

    if method not in ['standard', 'landy-szalay']:
        raise ValueError("method must be 'standard' or 'landy-szalay'")

    if bins.ndim != 1:
        raise ValueError("bins must be a 1D array")

    if data.ndim == 1:
        data = data[:, np.newaxis]
    elif data.ndim != 2:
        raise ValueError("data should be 1D or 2D")

    n_samples, n_features = data.shape
    Nbins = len(bins) - 1

    # shuffle all but one axis to get background distribution
    if data_R is None:
        data_R = data.copy()
        for i in range(n_features - 1):
            rng.shuffle(data_R[:, i])
    else:
        data_R = np.asarray(data_R)
        if (data_R.ndim != 2) or (data_R.shape[-1] != n_features):
            raise ValueError('data_R must have same n_features as data')

    factor = len(data_R) * 1. / len(data)
    
    KDT_D=KDTree(data)
    KDT_R=KDTree(data_R)
    print("Correlating Data, data size: {}".format(len(data)))
    counts_DD=KDT_D.two_point_correlation(data,bins)
    print('Correlating Random, random size: {}'.format(len(data_R)))
    counts_RR=KDT_R.two_point_correlation(data_R,bins)
    
    DD=np.diff(counts_DD)
    RR=np.diff(counts_RR)

    #- Check for zero in RR
    RR_zero = (RR == 0)
    RR[RR_zero]=1

    if method == 'standard':
        corr = factor**2*DD/RR - 1
    elif method == 'landy-szalay':
        print("Cross Correlating")
        counts_DR=KDT_R.two_point_correlation(data,bins)
        DR=np.diff(counts_DR)
        print("Evaluating correlation using {}".format(method))
        corr = (factor**2 * DD - 2 * factor * DR + RR)/RR 
    corr[RR_zero] = np.nan
    return corr
Пример #3
0
def two_point(data, bins, method='standard',
              data_R=None, random_state=None):
    """Two-point correlation function

    Parameters
    ----------
    data : array_like
        input data, shape = [n_samples, n_features]
    bins : array_like
        bins within which to compute the 2-point correlation.
        shape = Nbins + 1
    method : string
        "standard" or "landy-szalay".
    data_R : array_like (optional)
        if specified, use this as the random comparison sample
    random_state : integer, np.random.RandomState, or None
        specify the random state to use for generating background

    Returns
    -------
    corr : ndarray
        the estimate of the correlation function within each bin
        shape = Nbins
    """
    data = np.asarray(data)
    bins = np.asarray(bins)
    rng = check_random_state(random_state)

    if method not in ['standard', 'landy-szalay']:
        raise ValueError("method must be 'standard' or 'landy-szalay'")

    if bins.ndim != 1:
        raise ValueError("bins must be a 1D array")

    if data.ndim == 1:
        data = data[:, np.newaxis]
    elif data.ndim != 2:
        raise ValueError("data should be 1D or 2D")

    n_samples, n_features = data.shape
    Nbins = len(bins) - 1

    # shuffle all but one axis to get background distribution
    if data_R is None:
        data_R = data.copy()
        for i in range(n_features - 1):
            rng.shuffle(data_R[:, i])
    else:
        data_R = np.asarray(data_R)
        if (data_R.ndim != 2) or (data_R.shape[-1] != n_features):
            raise ValueError('data_R must have same n_features as data')

    factor = len(data_R) * 1. / len(data)

    if sklearn_has_two_point:
        # Fast two-point correlation functions added in scikit-learn v. 0.14
        KDT_D = KDTree(data)
        KDT_R = KDTree(data_R)

        counts_DD = KDT_D.two_point_correlation(data, bins)
        counts_RR = KDT_R.two_point_correlation(data_R, bins)

    else:
        warnings.warn("Version 0.3 of astroML will require scikit-learn "
                      "version 0.14 or higher for correlation function "
                      "calculations. Upgrade to sklearn 0.14+ now for much "
                      "faster correlation function calculations.")

        BT_D = BallTree(data)
        BT_R = BallTree(data_R)

        counts_DD = np.zeros(Nbins + 1)
        counts_RR = np.zeros(Nbins + 1)

        for i in range(Nbins + 1):
            counts_DD[i] = np.sum(BT_D.query_radius(data, bins[i],
                                                    count_only=True))
            counts_RR[i] = np.sum(BT_R.query_radius(data_R, bins[i],
                                                    count_only=True))

    DD = np.diff(counts_DD)
    RR = np.diff(counts_RR)

    # check for zero in the denominator
    RR_zero = (RR == 0)
    RR[RR_zero] = 1

    if method == 'standard':
        corr = factor ** 2 * DD / RR - 1
    elif method == 'landy-szalay':
        if sklearn_has_two_point:
            counts_DR = KDT_R.two_point_correlation(data, bins)
        else:
            counts_DR = np.zeros(Nbins + 1)
            for i in range(Nbins + 1):
                counts_DR[i] = np.sum(BT_R.query_radius(data, bins[i],
                                                        count_only=True))
        DR = np.diff(counts_DR)

        corr = (factor ** 2 * DD - 2 * factor * DR + RR) / RR

    corr[RR_zero] = np.nan

    return corr
Пример #4
0
KDT_D = KDTree(Data_D)
KDT_D1 = KDTree(Data_D1)
KDT_R = KDTree(Data_R)
KDT_R1 = KDTree(Data_R1)

Nbins =30
counts_DD1 = np.zeros(Nbins+1)
counts_RR1 = np.zeros(Nbins+1)
counts_DR1 = np.zeros(Nbins+1)
counts_D1R = np.zeros(Nbins+1)
bins = np.arange(0, 30)
print bins

#calculating Two point correlation using Sklearn function

counts_DD1 = KDT_D.two_point_correlation(Data_D, bins)
counts_RR1 = KDT_R.two_point_correlation(Data_R1, bins)
DD1 = np.diff(counts_DD1)
RR1 = np.diff(counts_RR1)
RR1_zero = (RR1 == 0)
RR1[RR1_zero] = 1

#for i in range(Nbins + 1):
#    counts_DR[i] = np.sum(BT_R.query_radius(Data_D, bins[i], count_only=True))

counts_DR1 = KDT_R1.two_point_correlation(Data_D, bins)
counts_D1R = KDT_R.two_point_correlation(Data_D1, bins)

DR1 = np.diff(counts_DR1)
D1R = np.diff(counts_D1R)
Пример #5
0
from pylab import plot,xlabel,ylabel,show
import time

NRAND = 10000 # how many randoms?

# get the randoms in this box (32 Mpc/h on a side)
rng = np.random.RandomState(0)
ran = rng.random_sample((NRAND, 3))*32
ran[:,2] = ran[:,2]*4./32.

# logarithmically-spaced bins
r = np.linspace(-1, 0.3, 10)
r = 10.0**r
print(r)
tree = KDTree(ran)
xi = tree.two_point_correlation(ran, r)

# remove self-pairs
xi = xi - NRAND

# make these a bin, rather than Npairs(<r)
nbin = len(r)
for i in range(nbin-1):
    xi[i+1] = xi[i+1] - xi[i]

print(xi)
print(r)



Пример #6
0
def two_point(data, bins, method='standard', data_R=None, random_state=None):
    """Two-point correlation function

    Parameters
    ----------
    data : array_like
        input data, shape = [n_samples, n_features]
    bins : array_like
        bins within which to compute the 2-point correlation.
        shape = Nbins + 1
    method : string
        "standard" or "landy-szalay".
    data_R : array_like (optional)
        if specified, use this as the random comparison sample
    random_state : integer, np.random.RandomState, or None
        specify the random state to use for generating background

    Returns
    -------
    corr : ndarray
        the estimate of the correlation function within each bin
        shape = Nbins
    """
    data = np.asarray(data)
    bins = np.asarray(bins)
    rng = check_random_state(random_state)

    if method not in ['standard', 'landy-szalay']:
        raise ValueError("method must be 'standard' or 'landy-szalay'")

    if bins.ndim != 1:
        raise ValueError("bins must be a 1D array")

    if data.ndim == 1:
        data = data[:, np.newaxis]
    elif data.ndim != 2:
        raise ValueError("data should be 1D or 2D")

    n_samples, n_features = data.shape
    Nbins = len(bins) - 1

    # shuffle all but one axis to get background distribution
    if data_R is None:
        data_R = data.copy()
        for i in range(n_features - 1):
            rng.shuffle(data_R[:, i])
    else:
        data_R = np.asarray(data_R)
        if (data_R.ndim != 2) or (data_R.shape[-1] != n_features):
            raise ValueError('data_R must have same n_features as data')

    factor = len(data_R) * 1. / len(data)

    if sklearn_has_two_point:
        # Fast two-point correlation functions added in scikit-learn v. 0.14
        KDT_D = KDTree(data)
        KDT_R = KDTree(data_R)

        counts_DD = KDT_D.two_point_correlation(data, bins)
        counts_RR = KDT_R.two_point_correlation(data_R, bins)

    else:
        warnings.warn("Version 0.3 of astroML will require scikit-learn "
                      "version 0.14 or higher for correlation function "
                      "calculations. Upgrade to sklearn 0.14+ now for much "
                      "faster correlation function calculations.")

        BT_D = BallTree(data)
        BT_R = BallTree(data_R)

        counts_DD = np.zeros(Nbins + 1)
        counts_RR = np.zeros(Nbins + 1)

        for i in range(Nbins + 1):
            counts_DD[i] = np.sum(
                BT_D.query_radius(data, bins[i], count_only=True))
            counts_RR[i] = np.sum(
                BT_R.query_radius(data_R, bins[i], count_only=True))

    DD = np.diff(counts_DD)
    RR = np.diff(counts_RR)

    # check for zero in the denominator
    RR_zero = (RR == 0)
    RR[RR_zero] = 1

    if method == 'standard':
        corr = factor**2 * DD / RR - 1
    elif method == 'landy-szalay':
        if sklearn_has_two_point:
            counts_DR = KDT_R.two_point_correlation(data, bins)
        else:
            counts_DR = np.zeros(Nbins + 1)
            for i in range(Nbins + 1):
                counts_DR[i] = np.sum(
                    BT_R.query_radius(data, bins[i], count_only=True))
        DR = np.diff(counts_DR)

        corr = (factor**2 * DD - 2 * factor * DR + RR) / RR

    corr[RR_zero] = np.nan

    return corr
Пример #7
0
    factor = len(Data_R1)*(1.0/len(Data_D))
    print factor

    KDT_D = KDTree(Data_D)
    KDT_R = KDTree(Data_R1)

    Nbins =30
    counts_DD = np.zeros(Nbins+1)

    counts_RR = np.zeros(Nbins+1)
    counts_DR = np.zeros(Nbins+1)
    bins = np.arange(0, 30)
    print bins


    counts_DD = KDT_D.two_point_correlation(Data_D, bins)
    #print counts_DD
    counts_RR = KDT_R.two_point_correlation(Data_R1, bins)

    DD = np.diff(counts_DD)
    RR = np.diff(counts_RR)

    RR_zero = (RR == 0)
    RR[RR_zero] = 1

    counts_DR = KDT_R.two_point_correlation(Data_D, bins)

    DR = np.diff(counts_DR)
    corr = (factor**2 * DD - 2 * factor * DR + RR) / RR

    corr[RR_zero] = np.nan