示例#1
0
def assign_kmeans_labels(pos, centers, verbose=False):
    """
    Defines 2D patches on the sky via spherical k-means

    Parameters
    ----------
    pos : np.ndarray
        positions of points in (RA, DEC)
    centers : int or np.ndarray
        Number of centers to use, or the (RA, DEC) coordinates of the centers
    verbose : bool
        verbose flag to pass to **kmeans_radec**

    Returns
    -------
    np.array, np.ndarray
        * K-means labels
        * K-means centers
    """

    if not np.iterable(centers):  # if centers is a number
        ncen = centers
        nsample = pos.shape[0] // 2
        km = krd.kmeans_sample(pos,
                               ncen=ncen,
                               nsample=nsample,
                               verbose=verbose)
        if not km.converged:
            km.run(pos, maxiter=100)
    else:  # if centers is an array of RA, DEC pairs
        assert len(centers.shape) == 2  # shape should be (:, 2)
        km = krd.KMeans(centers)

    labels = km.find_nearest(pos).astype(int)
    return labels, km.centers
示例#2
0
    def _FindIndex(self, regions=None):
        if regions is None:
            try:
                regions = self.regions
            except:
                raise Exception('You must specify a regions file to use')

        self.index = []
        if type(regions) == str:
            regions = [regions] * len(self.jkargs)
        if len(regions) != len(self.jkargs):
            raise Exception(
                'Number or regions files (%i) does not match the number of jkargs (%i)'
                % (len(regions), len(self.jkargs)))

        for i in range(len(self.jkargs)):

            centers = _np.loadtxt(regions[i])
            self.njack = centers.shape[0]
            km = kmeans_radec.KMeans(centers)

            ra, dec = self._GetRaDec(i)
            rdi = _np.zeros((len(ra), 2))
            rdi[:, 0] = ra
            rdi[:, 1] = dec
            index = km.find_nearest(rdi)
            self.index.append(index)
示例#3
0
def find_centers(x_samp, ncen, RA_bounds, Dec_bounds, maxiter=100):
    for i in range(10):
        RA = RA_bounds[0] + (RA_bounds[1] -
                             RA_bounds[0]) * np.random.rand(ncen)
        Dec = Dec_bounds[0] + (Dec_bounds[1] -
                               Dec_bounds[0]) * np.random.rand(ncen)
        cen_guess = np.array([RA, Dec]).T
        #print(cen_guess)
        km = kmrd.KMeans(cen_guess, verbose=0)
        km.run(X=x_samp.T, maxiter=maxiter)
        sys.stdout.flush()
        if (not np.any(np.bincount(km.labels) == 0)):
            return km.centers
    print("Did not find a good set of centers")
示例#4
0
def GenerateRegions(jarrs, jras, jdecs, jfile, njack, gindex, jtype):

    if jtype == 'generate':
        rdi = np.zeros((len(jarrs[gindex]), 2))
        rdi[:, 0] = jarrs[gindex][jras[gindex]]
        rdi[:, 1] = jarrs[gindex][jdecs[gindex]]

        if jfile is None:
            jfile = 'JK-{0}.txt'.format(njack)
        km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5)

        if not km.converged:
            raise RuntimeError("k means did not converge")
        np.savetxt(jfile, km.centers)

    elif jtype == 'read':
        centers = np.loadtxt(jfile)
        km = kmeans_radec.KMeans(centers)
        njack = len(centers)

    return [km, jfile]
示例#5
0
def assign_jk_labels(ra, dec, centers):
    """
    Assigns a Jacknife (JK) label to the points based on the passed centers

    Parameters
    -----------
    ra : np.array
        Right Ascension of objects
    dec : np.array
        Declination of objects
    centers : np.array
        Coordinates of centers for K-means patches

    Returns
    --------
    bool array, bool array, int array
        * inds which are *NOT IN* patch i,
        * inds which are *IN* patch i,
        * JK labels
    """

    pos = np.vstack((ra, dec)).T

    km = krd.KMeans(centers)

    labels = km.find_nearest(pos).astype(int)

    sub_labels = np.arange(len(centers), dtype=int)
    # sub_labels = np.unique(labels)

    # indexes of clusters for subsample i
    non_indexes = [np.where(labels != ind)[0] for ind in sub_labels]

    # indexes of clusters not in subsample i
    indexes = [np.where(labels == ind)[0] for ind in sub_labels]

    return indexes, non_indexes, labels
示例#6
0
    def get_patches(self, centers, verbose=False):
        """
        Obtains JK subpatches using a spherical k-means algorithm (from Erin)

        :param centers: JK center coordinates (RA, DEC) or numbers

        :param verbose: passed to kmeans radec
        """

        if not np.iterable(centers):  # if centers is a number
            self.ncen = centers
            nsample = self.pos.shape[0] // 2
            self.km = krd.kmeans_sample(self.pos, ncen=self.ncen,
                                        nsample=nsample, verbose=verbose)
            if not self.km.converged:
                self.km.run(self.pos, maxiter=100)
            self.centers = self.km.centers
        else:  # if centers is an array of RA, DEC pairs
            assert len(centers.shape) == 2  # shape should be (:, 2)
            self.km = krd.KMeans(centers)
            self.centers = centers
            self.ncen = len(centers)

        self.labels = self.km.find_nearest(self.pos).astype(int)
        self.sub_labels = np.unique(self.labels)

        # indexes of clusters for subsample i
        self.indexes = [np.where(self.labels != ind)[0]
                        for ind in self.sub_labels]

        # indexes of clusters not in subsample i
        self.non_indexes = [np.where(self.labels == ind)[0]
                            for ind in self.sub_labels]

        self.dsx_sub = np.zeros(shape=(self.nbin, self.ncen))
        self.dst_sub = np.zeros(shape=(self.nbin, self.ncen))
示例#7
0
def JackknifeOnSphere(jarrs,
                      jras,
                      jdecs,
                      jfunc,
                      jargs=[],
                      jkwargs={},
                      jtype='generate',
                      jfile=None,
                      njack=24,
                      generateonly=False,
                      gindex=0,
                      varonly=False,
                      save=None):
    jarrs = EnforceArray2D(jarrs)
    jras = EnforceArray2D(jras)
    jdec = EnforceArray2D(jdecs)

    if jtype == 'generate':
        rdi = np.zeros((len(jarrs[gindex]), 2))
        rdi[:, 0] = jarrs[gindex][jras[gindex]]
        rdi[:, 1] = jarrs[gindex][jdecs[gindex]]

        if jfile is None:
            jfile = 'JK-{0}.txt'.format(njack)
        km = kmeans_radec.kmeans_sample(rdi, njack, maxiter=200, tol=1.0e-5)
        if not km.converged:
            raise RuntimeError("k means did not converge")
        np.savetxt(jfile, km.centers)
        if generateonly:
            return jfile

    elif jtype == 'read':
        centers = np.loadtxt(jfile)
        km = kmeans_radec.KMeans(centers)
        njack = len(centers)

    ind = []
    for i in range(len(jarrs)):
        rdi = np.zeros((len(jarrs[i]), 2))
        rdi[:, 0] = jarrs[i][jras[i]]
        rdi[:, 1] = jarrs[i][jdecs[i]]
        index = km.find_nearest(rdi)
        ind.append(index)

    full_j, full_other = jfunc(jarrs, *jargs, **jkwargs)
    full_j = EnforceArray2D(full_j)
    full_other = EnforceArray1D(full_other)

    it_j = []
    it_other = []
    frac = []
    for j in range(njack):
        print 'JK %i' % (j)

        ja = []
        f = []

        for i in range(len(full_other)):
            if j == 0:
                it_other.append([])

        for i in range(len(full_j)):
            if j == 0:
                it_j.append([])

        for i in range(len(jarrs)):
            if j == 0:
                #it_j.append( [] )
                frac.append([])

            cut = (ind[i] == j)
            ja.append(jarrs[i][-cut])

            ff = np.sum(-cut) / float(len(cut))
            f.append(ff)

        i_j, i_other = jfunc(ja, *jargs, **jkwargs)
        i_j = EnforceArray2D(i_j)
        i_other = EnforceArray1D(i_other)

        for i in range(len(i_j)):
            it_j[i].append(np.copy(i_j[i]))
        for i in range(len(jarrs)):
            frac[i].append(f[i])
        for i in range(len(i_other)):
            it_other[i].append(np.copy(i_other[i]))

    for i in range(len(it_j)):
        it_j[i] = np.array(it_j[i])

    for i in range(len(frac)):
        frac[i] = np.array(frac[i])

    cov_j = []
    for k in range(len(full_j)):

        if varonly:
            cov = np.power(np.std(it_j[k], axis=0),
                           2.0) * njack * float(njack - 1) / njack
            cov_j.append(cov)

        else:
            csize = len(full_j[k])
            cov = np.zeros((csize, csize))

            for i in range(csize):
                for j in range(i, csize):
                    cov[i, j] = np.sum(
                        (it_j[k][:, i] - full_j[k][i]) *
                        (it_j[k][:, j] - full_j[k][j])) * float(njack -
                                                                1) / njack
                    #cov[i,j] =  np.sum( (it_j[k][:,i] - full_j[k][i]) * (it_j[k][:,j] - full_j[k][j])  * frac[k] )

                    if i != j:
                        cov[j, i] = cov[i, j]
            cov_j.append(cov)

    if save is not None:

        vec = os.path.join(save, 'vec')
        cov = os.path.join(save, 'cov')
        other = os.path.join(save, 'other')

        Write2Dir(vec, full_j)
        Write2Dir(cov, cov_j)
        Write2Dir(other, full_other)

    return [full_j, cov_j, full_other, it_other]