def reference_sim_paircount(pos1, w1, redges, Nmu, boxsize, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" tree1 = correlate.points(pos1, boxsize=boxsize, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=boxsize, weights=w2) bins = correlate.FlatSkyBinning( redges, Nmu, los=los, mu_min=0., absmu=True, ) pc = correlate.paircount(tree1, tree2, bins, np=0, usefast=False, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num( pc.mean_centers[0]), pc.sum1
def reference_survey_paircount(pos1, w1, redges, Nmu, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" tree1 = correlate.points(pos1, boxsize=None, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=None, weights=w2) bins = correlate.RmuBinning(redges, Nmu, observer=(0, 0, 0), mu_min=0., absmu=True) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num( pc.mean_centers[0]), pc.sum1
def work(i): with pool.critical: print 'doing chunk', i, Nchunks Qchunk = correlate.points(qpos[qchunks[i]], extra=Qfull.extra[qchunks[i]]) Rchunk = correlate.points(rpos[rchunks[i]], extra=Rfull.extra[rchunks[i]]) Fchunk = correlate.field(fpos[fchunks[i]], value=fdelta[fchunks[i]], extra=objectid[fchunks[i]] ) #Q-Q DQDQ[i, ...] = correlate.paircount(Qchunk, Qfull, binning, np=0).fullsum1 RQDQ[i, ...] = correlate.paircount(Rchunk, Qfull, binning, np=0).fullsum1 RQRQ[i, ...] = correlate.paircount(Rchunk, Rfull, binning, np=0).fullsum1 #Q-F DQDF = correlate.paircount(Qchunk, Ffull, binning, np=0) DQDFsum1[:, i, ...] = DQDF.fullsum1 DQDFsum2[i, ...] = DQDF.fullsum2 RQDF = correlate.paircount(Rchunk, Ffull, binning, np=0) RQDFsum1[:, i, ...] = RQDF.fullsum1 RQDFsum2[i, ...] = RQDF.fullsum2 #F-F DFDF = correlate.paircount(Fchunk, Ffull, binning, np=0) DFDFsum1[:, i, ...] = DFDF.fullsum1 DFDFsum2[i, ...] = DFDF.fullsum2 with pool.critical: print 'done chunk', i, Nchunks, len(fchunks[i])
def correlate_info(data1,data2, NBINS = NBINS, RMIN=1, RMAX=2, BOXSIZE = BOXSIZE, WRAP = WRAP): if data1 is not None: if RMAX is None: RMAX = BOXSIZE if WRAP: wrap_length = BOXSIZE else: wrap_length = None dataset1 = correlate.points(data1, boxsize = wrap_length) dataset2 = correlate.points(data2, boxsize = wrap_length) binning = correlate.RBinning(np.logspace(np.log10(RMIN),np.log10(RMAX),NBINS+1)) DD = correlate.paircount(dataset1,dataset2, binning, np=0) DD = DD.sum1 N=len(dataset1)-1 # if (sum(DD)!=N): # print data1,data2 return DD,N else: return None, None,None
def reference_survey_paircount(pos1, w1, rp_bins, pimax, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" from kdcount import KDTree, correlate pi_bins = numpy.linspace(0, pimax, int(pimax)+1) tree1 = correlate.points(pos1, boxsize=None, weights=w1).tree.root if pos2 is None: pos2 = pos1; w2 = w1 tree2 = correlate.points(pos2, boxsize=None, weights=w2).tree.root if w1 is None: w1 = numpy.ones_like(pos1) if w2 is None: w2 = numpy.ones_like(pos2) # find all pairs r, i, j = tree1.enum(tree2, (pimax**2 + rp_bins[-1]**2)**0.5) def compute_rp_pi(r, i, j): r1 = pos1[i] r2 = pos2[j] center = 0.5 * (r1 + r2) dr = r1 - r2 dot = numpy.einsum('ij, ij->i', dr, center) dot2 = dot ** 2 center2 = numpy.einsum('ij, ij->i', center, center) los2 = dot2 / center2 dr2 = numpy.einsum('ij, ij->i', dr, dr) x2 = numpy.abs(dr2 - los2) return x2 ** 0.5, los2 ** 0.5 # compute rp, pi distances rp, pi = compute_rp_pi(r, i, j) # digitize rp_dig = numpy.digitize(rp, rp_bins) pi_dig = numpy.digitize(pi, pi_bins) shape = (len(rp_bins)+1,len(pi_bins)+1) multi_index = numpy.ravel_multi_index([rp_dig, pi_dig], shape) # initialize the return arrays npairs = numpy.zeros(shape, dtype='i8') rpavg = numpy.zeros(shape, dtype='f8') weightavg = numpy.zeros(shape, dtype='f8') # mean rp values rpavg.flat += numpy.bincount(multi_index, weights=rp, minlength=rpavg.size) rpavg = rpavg[1:-1,1:-1] # count the pairs npairs.flat += numpy.bincount(multi_index, minlength=npairs.size) npairs = npairs[1:-1,1:-1] # mean weights weightavg.flat += numpy.bincount(multi_index, weights=w1[i]*w2[j], minlength=weightavg.size) weightavg = weightavg[1:-1,1:-1] return npairs, rpavg/npairs, weightavg/npairs
def reference_survey_paircount(pos1, w1, rp_bins, pimax, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" from kdcount import KDTree, correlate pi_bins = numpy.linspace(0, pimax, int(pimax)+1) tree1 = correlate.points(pos1, boxsize=None, weights=w1).tree.root if pos2 is None: pos2 = pos1; w2 = w1 tree2 = correlate.points(pos2, boxsize=None, weights=w2).tree.root if w1 is None: w1 = numpy.ones_like(pos1) if w2 is None: w2 = numpy.ones_like(pos2) # find all pairs r, i, j = tree1.enum(tree2, (pimax**2 + rp_bins[-1]**2)**0.5) def compute_rp_pi(r, i, j): r1 = pos1[i] r2 = pos2[j] center = 0.5 * (r1 + r2) dr = r1 - r2 dot = numpy.einsum('ij, ij->i', dr, center) dot2 = dot ** 2 center2 = numpy.einsum('ij, ij->i', center, center) los2 = dot2 / center2 dr2 = numpy.einsum('ij, ij->i', dr, dr) x2 = numpy.abs(dr2 - los2) return x2 ** 0.5, los2 ** 0.5 # compute rp, pi distances rp, pi = compute_rp_pi(r, i, j) # digitize rp_dig = numpy.digitize(rp, rp_bins) pi_dig = numpy.digitize(pi, pi_bins) shape = (len(rp_bins)+1,len(pi_bins)+1) multi_index = numpy.ravel_multi_index([rp_dig, pi_dig], shape) # initialize the return arrays npairs = numpy.zeros(shape, dtype='i8') rpavg = numpy.zeros(shape, dtype='f8') wnpairs = numpy.zeros(shape, dtype='f8') # mean rp values rpavg.flat += numpy.bincount(multi_index, weights=rp, minlength=rpavg.size) rpavg = rpavg[1:-1,1:-1] # count the pairs npairs.flat += numpy.bincount(multi_index, minlength=npairs.size) npairs = npairs[1:-1,1:-1] wnpairs.flat += numpy.bincount(multi_index, weights=w1[i]*w2[j], minlength=wnpairs.size) wnpairs = wnpairs[1:-1,1:-1] return npairs, numpy.nan_to_num(rpavg/npairs), numpy.nan_to_num(wnpairs)
def test_weighted(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) datasetw = correlate.points(pos, boxsize=1.0, weights=numpy.ones(len(pos))) dataset = correlate.points(pos, boxsize=1.0) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(datasetw, datasetw, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning, np=0) assert_equal(r.sum1, r1.sum1)
def test_weighted(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) datasetw = correlate.points(pos, boxsize=1.0, weights=numpy.ones(len(pos))) dataset = correlate.points(pos, boxsize=1.0) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(datasetw, datasetw, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning, np=0) assert_equal( r.sum1, r1.sum1)
def reference_2pcf_smu(sedges,muedges,position1,weight1,position2=None,weight2=None,los='midpoint'): """Reference pair counting via kdcount""" tree1 = correlate.points(position1,boxsize=None,weights=weight1) if position2 is None: tree2 = tree1 else: tree2 = correlate.points(position2,boxsize=None,weights=weight2) if los=='midpoint': bins = correlate.RmuBinning(np.asarray(sedges),(len(muedges)-1),observer=(0,0,0),mu_min=muedges[0],mu_max=muedges[-1],absmu=False) else: bins = correlate.FlatSkyBinning(np.asarray(sedges),(len(muedges)-1),los='xyz'.index(los),mu_min=muedges[0],mu_max=muedges[-1],absmu=False) pc = correlate.paircount(tree2,tree1,bins,np=0,usefast=False,compute_mean_coords=True) return pc.sum1
def reference_2pcf_s(sedges,position1,weight1,position2=None,weight2=None): """Reference pair counting via kdcount""" tree1 = correlate.points(position1,boxsize=None,weights=weight1) factor = 1. if position2 is None: tree2 = tree1 factor = 1./2. else: tree2 = correlate.points(position2,boxsize=None,weights=weight2) bins = correlate.RBinning(np.asarray(sedges)) pc = correlate.paircount(tree1,tree2,bins,np=0,usefast=False,compute_mean_coords=True) return factor*pc.sum1
def main(A): data = correlate.points(getqso(A)) random = correlate.points(getrandom(A)) binning = correlate.RmuBinning(160000, Nbins=40, Nmubins=48, observer=0) DD = correlate.paircount(data, data, binning) DR = correlate.paircount(data, random, binning) RR = correlate.paircount(random, random, binning) r = 1.0 * len(data) / len(random) xi = (DD.sum1 + r ** 2 * RR.sum1 - 2 * r * DR.sum1) / (r ** 2 * RR.sum1) func = CorrFunc(DD.centers[0], DD.centers[1], xi) numpy.savez(os.path.join(A.datadir, "qsocorr-Rmu.npz"), center=DD.centers, xi=xi, corr=func)
def reference_2pcf_multi(sedges,position1,weight1,position2=None,weight2=None,ells=[0,1,2,3,4],los='midpoint'): """Reference pair counting via kdcount""" tree1 = correlate.points(position1,boxsize=None,weights=weight1) if position2 is None: tree2 = tree1 else: tree2 = correlate.points(position2,boxsize=None,weights=weight2) if los=='midpoint': bins = correlate.MultipoleBinning(np.asarray(sedges),ells) else: bins = correlate.FlatSkyMultipoleBinning(np.asarray(sedges),ells,los='xyz'.index(los)) pc = correlate.paircount(tree2,tree1,bins,np=0,usefast=False,compute_mean_coords=True) norm = (-1)**np.asarray(ells)*1./(2*np.asarray(ells)+1) return pc.sum1.T*norm
def reference_survey_paircount(pos1, w1, redges, Nmu, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" tree1 = correlate.points(pos1, boxsize=None, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=None, weights=w2) bins = correlate.RmuBinning(redges, Nmu, observer=(0,0,0), mu_min=0., absmu=True) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num(pc.mean_centers[0]), pc.sum1
def reference_paircount(pos1, w1, redges, boxsize, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" # make the trees tree1 = correlate.points(pos1, boxsize=boxsize, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=boxsize, weights=w2) # do the paircount bins = correlate.RBinning(redges) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num(pc.mean_centers), pc.sum1
def test_cross(): numpy.random.seed(1234) pos1 = numpy.random.uniform(size=(10000, 2)) pos2 = numpy.random.uniform(size=(10000, 2)) * 0.3 dataset1 = correlate.points(pos1, boxsize=None) dataset2 = correlate.points(pos2, boxsize=None) binning = correlate.RBinning(numpy.linspace(0, 0.1, 10)) r1 = correlate.paircount(dataset1, dataset2, binning, np=0, usefast=False) r2 = correlate.paircount(dataset1, dataset2, binning, np=0, usefast=True) assert_equal(r1.sum1, r2.sum1) r3 = correlate.paircount(dataset1, dataset2, binning, np=4, usefast=False) assert_equal(r1.sum1, r3.sum1) r4 = correlate.paircount(dataset1, dataset2, binning, np=4, usefast=True) assert_equal(r1.sum1, r4.sum1)
def correlate_info(data, NBINS=NBINS, RMIN=RMIN, RMAX=RMAX, BOXSIZE=BOXSIZE, WRAP=WRAP): if data is not None: if RMAX is None: RMAX = BOXSIZE if WRAP: wrap_length = BOXSIZE else: wrap_length = None dataset = correlate.points(data, boxsize=wrap_length) binning = correlate.RBinning( np.logspace(np.log10(RMIN), np.log10(RMAX), NBINS + 1)) # RR=N**2*numpy.asarray([poiss(rbin[i],rbin[i+1]) for i in range(0,nbins)]) DD = correlate.paircount(dataset, dataset, binning, np=16) DD = DD.sum1 # print 'Done correlating' r = binning.centers return r, DD else: return None, None
def test_unweighted(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) pos1 = pos[:, None, :] pos2 = pos[None, :, :] dist = pos1 - pos2 dist[dist > 0.5] -= 1.0 dist[dist < -0.5] += 1.0 dist = numpy.einsum('ijk,ijk->ij', dist, dist) ** 0.5 dataset = correlate.points(pos, boxsize=1.0) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) dig = binning.edges.searchsorted(dist.flat, side='left') truth = numpy.bincount(dig) r = correlate.paircount(dataset, dataset, binning, np=0) assert_equal( r.sum1, truth[1:-1]) r1 = correlate.paircount(dataset, dataset, binning1, np=0) assert_equal(r1.sum1, truth[1:-1])
def test_channels(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) datasetw = correlate.points(pos, boxsize=1.0, weights=numpy.ones(len(pos))) dataset = correlate.points(pos, boxsize=1.0) binning_mc1 = correlate.FlatSkyMultipoleBinning(numpy.linspace(0, 0.5, 10), ells=[0, 0, 0], los=0) binning_mc2 = correlate.MultipoleBinning(numpy.linspace(0, 0.5, 10), ells=[0, 0, 0]) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r_mc1 = correlate.paircount(datasetw, datasetw, binning_mc1, np=0) r_mc2 = correlate.paircount(datasetw, datasetw, binning_mc2, np=0) r1 = correlate.paircount(dataset, dataset, binning, np=0) assert_equal( r_mc1.sum1[0], r1.sum1) assert_equal( r_mc2.sum1[0], r1.sum1)
def test_unweighted(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) pos1 = pos[:, None, :] pos2 = pos[None, :, :] dist = pos1 - pos2 dist[dist > 0.5] -= 1.0 dist[dist < -0.5] += 1.0 dist = numpy.einsum('ijk,ijk->ij', dist, dist)**0.5 dataset = correlate.points(pos, boxsize=1.0) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) dig = binning.edges.searchsorted(dist.flat, side='left') truth = numpy.bincount(dig) r = correlate.paircount(dataset, dataset, binning, np=0) assert_equal(r.sum1, truth[1:-1]) r1 = correlate.paircount(dataset, dataset, binning1, np=0) assert_equal(r1.sum1, truth[1:-1])
def reference_paircount(pos1, w1, redges, boxsize, pos2=None, w2=None, los=2): """Reference pair counting via kdcount""" # make the trees tree1 = correlate.points(pos1, boxsize=boxsize, weights=w1) if pos2 is None: tree2 = tree1 else: tree2 = correlate.points(pos2, boxsize=boxsize, weights=w2) # do the paircount bins = correlate.RBinning(redges) pc = correlate.paircount(tree1, tree2, bins, np=0, compute_mean_coords=True) return numpy.nan_to_num(pc.pair_counts), numpy.nan_to_num( pc.mean_centers), pc.sum1
def test_simple(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(10, 3)) dataset = correlate.points(pos, boxsize=1.0) binning = correlate.RBinning(numpy.linspace(0.5, 10)) r = correlate.paircount(dataset, dataset, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning, usefast=True, np=0) assert_equal( r.sum1, r1.sum1)
def test_channels(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(1000, 3)) datasetw = correlate.points(pos, boxsize=1.0, weights=numpy.ones(len(pos))) dataset = correlate.points(pos, boxsize=1.0) binning_mc1 = correlate.FlatSkyMultipoleBinning(numpy.linspace(0, 0.5, 10), ells=[0, 0, 0], los=0) binning_mc2 = correlate.MultipoleBinning(numpy.linspace(0, 0.5, 10), ells=[0, 0, 0]) binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) r_mc1 = correlate.paircount(datasetw, datasetw, binning_mc1, np=0) r_mc2 = correlate.paircount(datasetw, datasetw, binning_mc2, np=0) r1 = correlate.paircount(dataset, dataset, binning, np=0) assert_equal(r_mc1.sum1[0], r1.sum1) assert_equal(r_mc2.sum1[0], r1.sum1)
def test_cross(): numpy.random.seed(1234) pos1 = numpy.random.uniform(size=(10000, 2)) pos2 = numpy.random.uniform(size=(10000, 2)) * 0.3 dataset1 = correlate.points(pos1, boxsize=None) dataset2 = correlate.points(pos2, boxsize=None) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) r1 = correlate.paircount(dataset1, dataset2, binning, np=0) r2 = correlate.paircount(dataset1, dataset2, binning1, np=0) assert_equal(r1.sum1, r2.sum1) r3 = correlate.paircount(dataset1, dataset2, binning, np=4) assert_equal(r1.sum1, r3.sum1) r4 = correlate.paircount(dataset1, dataset2, binning1, np=4) assert_equal(r1.sum1, r4.sum1)
def main(A): data = correlate.points(getqso(A)) random = correlate.points(getrandom(A)) binning = correlate.RBinning(160000, 20) DD = correlate.paircount(data, data, binning) DR = correlate.paircount(data, random, binning) RR = correlate.paircount(random, random, binning) r = 1.0 * len(data) / len(random) corr = (DD.sum1 + r ** 2 * RR.sum1 - 2 * r * DR.sum1) / (r ** 2 * RR.sum1) numpy.savetxt(stdout, zip(DD.centers, corr), fmt='%g') r = DD.centers from matplotlib.figure import Figure from matplotlib.backends.backend_agg import FigureCanvasAgg figure = Figure(figsize=(4, 3), dpi=200) ax = figure.add_axes([.1, .1, .85, .85]) ax.plot(r / 1000, (r / 1000) ** 2 * corr, 'o ', label='LS') ax.legend() canvas = FigureCanvasAgg(figure) figure.savefig(os.path.join(A.datadir, 'quasar-corr.svg'))
def test_simple(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(10, 3)) dataset = correlate.points(pos, boxsize=1.0) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(dataset, dataset, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning1, np=0) assert_equal( r.sum1, r1.sum1)
def test_simple(): numpy.random.seed(1234) pos = numpy.random.uniform(size=(10, 3)) dataset = correlate.points(pos, boxsize=1.0) # use the python point point counting binning = correlate.RBinning(numpy.linspace(0, 0.5, 10)) # use the C node node counting binning1 = correlate.FastRBinning(numpy.linspace(0, 0.5, 10)) r = correlate.paircount(dataset, dataset, binning, np=0) r1 = correlate.paircount(dataset, dataset, binning1, np=0) assert_equal(r.sum1, r1.sum1)
def test_paircount(): numpy.random.seed(1234) data = 1.0 * ((-numpy.arange(4).reshape(-1, 1)) % 2) data = correlate.points(data) bsfun = lambda x: numpy.int32(x.pos[:, 0]) policy = bootstrap.policy(bsfun, data) binning=correlate.RBinning(numpy.linspace(0, 100, 2, endpoint=True)) def estimator( x, y): r = correlate.paircount(x, y, binning, usefast=False, np=0) return r.fullsum1 result = policy.run(estimator, data, data) L, R = policy.resample(result, numpy.arange(2)) assert_array_equal(L, (4, 4)) assert_array_equal(R, (8, 8, 0))
def test_paircount(): numpy.random.seed(1234) data = 1.0 * ((-numpy.arange(4).reshape(-1, 1)) % 2) data = correlate.points(data) bsfun = lambda x: numpy.int32(x.pos[:, 0]) policy = bootstrap.policy(bsfun, data) binning=correlate.RBinning(numpy.linspace(0, 100, 2, endpoint=True)) def estimator( x, y): r = correlate.paircount(x, y, binning, np=0) return r.fullsum1 result = policy.run(estimator, data, data) L, R = policy.resample(result, numpy.arange(2)) assert_array_equal(L, (4, 4)) assert_array_equal(R, (8, 8, 0))
def compute_brutal_corr(datasources, redges, Nmu=0, comm=None, subsample=1, los='z', poles=[]): r""" Compute the correlation function by direct pair summation, either as a function of separation (`R`) or as a function of separation and line-of-sight angle (`R`, `mu`) The estimator used to compute the correlation function is: .. math:: \xi(r, \mu) = DD(r, \mu) / RR(r, \mu) - 1. where `DD` is the number of data-data pairs, and `RR` is the number of random-random pairs, which is determined solely by the binning used, assuming a constant number density Parameters ---------- datasources : list of DataSource objects the list of data instances from which the 3D correlation will be computed redges : array_like the bin edges for the `R` variable Nmu : int, optional the number of desired `mu` bins, where `mu` is the cosine of the angle from the line-of-sight. Default is `0`, in which case the correlation function is binned as a function of `R` only comm : MPI.Communicator, optional the communicator to pass to the ``ParticleMesh`` object. If not provided, ``MPI.COMM_WORLD`` is used subsample : int, optional downsample the input datasources by choosing 1 out of every `N` points. Default is `1` (no subsampling). los : str, {'x', 'y', 'z'}, optional the dimension to treat as the line-of-sight; default is 'z'. poles : list of int, optional integers specifying the multipoles to compute from the 2D correlation function Returns ------- pc : :class:`kdcount.correlate.paircount` the pair counting instance xi : array_like the correlation function result; if `poles` supplied, the shape is `(len(redges)-1, len(poles))`, otherwise, the shape is either `(len(redges)-1, )` or `(len(redges)-1, Nmu)` RR : array_like the number of random-random pairs (used as normalization of the data-data pairs) """ from pmesh.domain import GridND from kdcount import correlate # some setup if los not in "xyz": raise ValueError("`los` must be `x`, `y`, or `z`") los = "xyz".index(los) poles = numpy.array(poles) Rmax = redges[-1] if comm is None: comm = MPI.COMM_WORLD # determine processor division for domain decomposition for Nx in range(int(comm.size**0.3333) + 1, 0, -1): if comm.size % Nx == 0: break else: Nx = 1 for Ny in range(int(comm.size**0.5) + 1, 0, -1): if (comm.size // Nx) % Ny == 0: break else: Ny = 1 Nz = comm.size // Nx // Ny Nproc = [Nx, Ny, Nz] # log some info if comm.rank == 0: logger.info('Nproc = %s' %str(Nproc)) logger.info('Rmax = %g' %Rmax) # domain decomposition grid = [numpy.linspace(0, datasources[0].BoxSize[i], Nproc[i]+1, endpoint=True) for i in range(3)] domain = GridND(grid, comm=comm) # read position for field #1 with datasources[0].open() as stream: [[pos1]] = stream.read(['Position'], full=True) pos1 = pos1[comm.rank * subsample // comm.size ::subsample] N1 = comm.allreduce(len(pos1)) # read position for field #2 if len(datasources) > 1: with datasources[1].open() as stream: [[pos2]] = stream.read(['Position'], full=True) pos2 = pos2[comm.rank * subsample // comm.size ::subsample] N2 = comm.allreduce(len(pos2)) else: pos2 = pos1 N2 = N1 # exchange field #1 positions layout = domain.decompose(pos1, smoothing=0) pos1 = layout.exchange(pos1) if comm.rank == 0: logger.info('exchange pos1') # exchange field #2 positions if Rmax > datasources[0].BoxSize[0] * 0.25: pos2 = numpy.concatenate(comm.allgather(pos2), axis=0) else: layout = domain.decompose(pos2, smoothing=Rmax) pos2 = layout.exchange(pos2) if comm.rank == 0: logger.info('exchange pos2') # initialize the trees to hold the field points tree1 = correlate.points(pos1, boxsize=datasources[0].BoxSize) tree2 = correlate.points(pos2, boxsize=datasources[0].BoxSize) # log the sizes of the trees logger.info('rank %d correlating %d x %d' %(comm.rank, len(tree1), len(tree2))) if comm.rank == 0: logger.info('all correlating %d x %d' %(N1, N2)) # use multipole binning if len(poles): bins = correlate.FlatSkyMultipoleBinning(redges, poles, los, compute_mean_coords=True) # use (R, mu) binning elif Nmu > 0: bins = correlate.FlatSkyBinning(redges, Nmu, los, compute_mean_coords=True) # use R binning else: bins = correlate.RBinning(redges, compute_mean_coords=True) # do the pair counting # have to set usefast = False to get mean centers, or exception thrown pc = correlate.paircount(tree2, tree1, bins, np=0, usefast=False) pc.sum1[:] = comm.allreduce(pc.sum1) # get the mean bin values, reducing from all ranks pc.pair_counts[:] = comm.allreduce(pc.pair_counts) with numpy.errstate(invalid='ignore'): if bins.Ndim > 1: for i in range(bins.Ndim): pc.mean_centers[i][:] = comm.allreduce(pc.mean_centers_sum[i]) / pc.pair_counts else: pc.mean_centers[:] = comm.allreduce(pc.mean_centers_sum[0]) / pc.pair_counts # compute the random pairs from the fractional volume RR = 1.*N1*N2 / datasources[0].BoxSize.prod() if Nmu > 0: dr3 = numpy.diff(pc.edges[0]**3) dmu = numpy.diff(pc.edges[1]) RR *= 2. / 3. * numpy.pi * dr3[:,None] * dmu[None,:] else: RR *= 4. / 3. * numpy.pi * numpy.diff(pc.edges**3) # return the correlation and the pair count object xi = (1. * pc.sum1 / RR) - 1.0 if len(poles): xi = xi.T # makes ell the second axis xi[:,poles!=0] += 1.0 # only monopole gets the minus one return pc, xi, RR
def main(): comm = MPI.COMM_WORLD SNAP, LABEL = None, None if comm.rank == 0: SNAP = files.Snapshot(ns.snapfilename, files.TPMSnapshotFile) LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile) SNAP = comm.bcast(SNAP) LABEL = comm.bcast(LABEL) Ntot = sum(SNAP.npart) assert Ntot == sum(LABEL.npart) h = files.HaloFile(ns.halocatalogue) N = h.read_mass() N0 = Ntot - sum(N[1:]) # halos are assigned to ranks 0, 1, 2, 3 ... halorank = numpy.arange(len(N)) % comm.size # but non halos are special we will fix it later. halorank[0] = -1 NonhaloStart = comm.rank * int(N0) // comm.size NonhaloEnd = (comm.rank + 1) * int(N0) // comm.size myNtotal = numpy.sum(N[halorank == comm.rank], dtype='i8') + (NonhaloEnd - NonhaloStart) print("Rank %d NonhaloStart %d NonhaloEnd %d myNtotal %d" % (comm.rank, NonhaloStart, NonhaloEnd, myNtotal)) data = numpy.empty(myNtotal, dtype=[ ('Position', ('f4', 3)), ('Label', ('i4')), ('Rank', ('i4')), ]) allNtotal = comm.allgather(myNtotal) start = sum(allNtotal[:comm.rank]) end = sum(allNtotal[:comm.rank + 1]) data['Position'] = SNAP.read("Position", start, end) data['Label'] = LABEL.read("Label", start, end) data['Rank'] = halorank[data['Label']] # now assign ranks to nonhalo particles nonhalomask = (data['Label'] == 0) nonhalocount = comm.allgather(nonhalomask.sum()) data['Rank'][nonhalomask] = (sum(nonhalocount[:comm.rank]) + numpy.arange(nonhalomask.sum())) % comm.size mpsort.sort(data, orderby='Rank') arg = data['Label'].argsort() data = data[arg] ul = numpy.unique(data['Label']) bins = correlate.RBinning(40. / ns.boxsize, Nbins=ns.Nmesh) sum1 = numpy.zeros(len(bins.centers)) for l in ul: if l == 0: continue start = data['Label'].searchsorted(l, side='left') end = data['Label'].searchsorted(l, side='right') pos = data['Position'][start:end] dataset = correlate.points(pos, boxsize=1.0) result = correlate.paircount(dataset, dataset, bins, np=0) sum1 += result.sum1 if l % 1000 == 0: print l sum1 = comm.allreduce(sum1, MPI.SUM) Ntot = sum(SNAP.npart) RR = 4. / 3 * numpy.pi * numpy.diff(bins.edges**3) * (1.0 * Ntot * Ntot) k = numpy.arange(ns.Nmesh // 2) * 2 * numpy.pi / ns.boxsize # asymtotically zero at r. The mean doesn't matter as # we don't use zero k mode anyways. k, p = corrfrompower(bins.centers * ns.boxsize, sum1 / RR, R=k) # inverse FT factor p *= (2 * numpy.pi)**3 if comm.rank == 0: if ns.output != '-': ff = open(ns.output, 'w') ff2 = open(ns.output + '.xi', 'w') with ff2: numpy.savetxt(ff2, zip(bins.centers, sum1 / RR - 1.0)) else: ff = stdout with ff: # numpy.savetxt(ff, zip(bins.centers, sum1 / RR - 1.0)) numpy.savetxt(ff, zip(k, p))
def compute_brutal_corr(datasources, redges, Nmu=0, comm=None, subsample=1, los='z', poles=[]): r""" Compute the correlation function by direct pair summation, either as a function of separation (`R`) or as a function of separation and line-of-sight angle (`R`, `mu`) The estimator used to compute the correlation function is: .. math:: \xi(r, \mu) = DD(r, \mu) / RR(r, \mu) - 1. where `DD` is the number of data-data pairs, and `RR` is the number of random-random pairs, which is determined solely by the binning used, assuming a constant number density Parameters ---------- datasources : list of DataSource objects the list of data instances from which the 3D correlation will be computed redges : array_like the bin edges for the `R` variable Nmu : int, optional the number of desired `mu` bins, where `mu` is the cosine of the angle from the line-of-sight. Default is `0`, in which case the correlation function is binned as a function of `R` only comm : MPI.Communicator, optional the communicator to pass to the ``ParticleMesh`` object. If not provided, ``MPI.COMM_WORLD`` is used subsample : int, optional downsample the input datasources by choosing 1 out of every `N` points. Default is `1` (no subsampling). los : str, {'x', 'y', 'z'}, optional the dimension to treat as the line-of-sight; default is 'z'. poles : list of int, optional integers specifying the multipoles to compute from the 2D correlation function Returns ------- pc : :class:`kdcount.correlate.paircount` the pair counting instance xi : array_like the correlation function result; if `poles` supplied, the shape is `(len(redges)-1, len(poles))`, otherwise, the shape is either `(len(redges)-1, )` or `(len(redges)-1, Nmu)` RR : array_like the number of random-random pairs (used as normalization of the data-data pairs) """ from pmesh.domain import GridND from kdcount import correlate # some setup if los not in "xyz": raise ValueError("`los` must be `x`, `y`, or `z`") los = "xyz".index(los) poles = numpy.array(poles) Rmax = redges[-1] if comm is None: comm = MPI.COMM_WORLD # determine processor division for domain decomposition for Nx in range(int(comm.size**0.3333) + 1, 0, -1): if comm.size % Nx == 0: break else: Nx = 1 for Ny in range(int(comm.size**0.5) + 1, 0, -1): if (comm.size // Nx) % Ny == 0: break else: Ny = 1 Nz = comm.size // Nx // Ny Nproc = [Nx, Ny, Nz] # log some info if comm.rank == 0: logger.info('Nproc = %s' % str(Nproc)) logger.info('Rmax = %g' % Rmax) # domain decomposition grid = [ numpy.linspace(0, datasources[0].BoxSize[i], Nproc[i] + 1, endpoint=True) for i in range(3) ] domain = GridND(grid, comm=comm) # read position for field #1 with datasources[0].open() as stream: [[pos1]] = stream.read(['Position'], full=True) pos1 = pos1[comm.rank * subsample // comm.size::subsample] N1 = comm.allreduce(len(pos1)) # read position for field #2 if len(datasources) > 1: with datasources[1].open() as stream: [[pos2]] = stream.read(['Position'], full=True) pos2 = pos2[comm.rank * subsample // comm.size::subsample] N2 = comm.allreduce(len(pos2)) else: pos2 = pos1 N2 = N1 # exchange field #1 positions layout = domain.decompose(pos1, smoothing=0) pos1 = layout.exchange(pos1) if comm.rank == 0: logger.info('exchange pos1') # exchange field #2 positions if Rmax > datasources[0].BoxSize[0] * 0.25: pos2 = numpy.concatenate(comm.allgather(pos2), axis=0) else: layout = domain.decompose(pos2, smoothing=Rmax) pos2 = layout.exchange(pos2) if comm.rank == 0: logger.info('exchange pos2') # initialize the trees to hold the field points tree1 = correlate.points(pos1, boxsize=datasources[0].BoxSize) tree2 = correlate.points(pos2, boxsize=datasources[0].BoxSize) # log the sizes of the trees logger.info('rank %d correlating %d x %d' % (comm.rank, len(tree1), len(tree2))) if comm.rank == 0: logger.info('all correlating %d x %d' % (N1, N2)) # use multipole binning if len(poles): bins = correlate.FlatSkyMultipoleBinning(redges, poles, los, compute_mean_coords=True) # use (R, mu) binning elif Nmu > 0: bins = correlate.FlatSkyBinning(redges, Nmu, los, compute_mean_coords=True) # use R binning else: bins = correlate.RBinning(redges, compute_mean_coords=True) # do the pair counting # have to set usefast = False to get mean centers, or exception thrown pc = correlate.paircount(tree2, tree1, bins, np=0, usefast=False) pc.sum1[:] = comm.allreduce(pc.sum1) # get the mean bin values, reducing from all ranks pc.pair_counts[:] = comm.allreduce(pc.pair_counts) with numpy.errstate(invalid='ignore'): if bins.Ndim > 1: for i in range(bins.Ndim): pc.mean_centers[i][:] = comm.allreduce( pc.mean_centers_sum[i]) / pc.pair_counts else: pc.mean_centers[:] = comm.allreduce( pc.mean_centers_sum[0]) / pc.pair_counts # compute the random pairs from the fractional volume RR = 1. * N1 * N2 / datasources[0].BoxSize.prod() if Nmu > 0: dr3 = numpy.diff(pc.edges[0]**3) dmu = numpy.diff(pc.edges[1]) RR *= 2. / 3. * numpy.pi * dr3[:, None] * dmu[None, :] else: RR *= 4. / 3. * numpy.pi * numpy.diff(pc.edges**3) # return the correlation and the pair count object xi = (1. * pc.sum1 / RR) - 1.0 if len(poles): xi = xi.T # makes ell the second axis xi[:, poles != 0] += 1.0 # only monopole gets the minus one return pc, xi, RR
def dobootstrap(binning, dir): A = Config(dir + '/paramfile', dir) r, mu = binning.centers qpos = getqso(A) rpos = getrandom(A) fdelta, fpos, objectid = getforest(A, Zmin=2.0, Zmax=3.0, RfLamMin=1040, RfLamMax=1216, combine=4) # fdelta, fpos = numpy.empty((2, 1, 3)) # objectid = numpy.empty(1, dtype='i8') qchunks = chop(4, qpos) rchunks = chop(4, rpos) fchunks = chop(4, fpos) Nchunks = len(qchunks) Nvars = fdelta.shape[-1] print 'Nchunks', Nchunks, 'Num of variables in fdelta', Nvars chunkshape = [Nchunks, binning.shape[0], binning.shape[1]] # last index is the chunk DQDQ, RQDQ, RQRQ = sharedmem.empty([3] + chunkshape) DQDFsum1, RQDFsum1, DFDFsum1 = sharedmem.empty([3, Nvars] + chunkshape) DQDFsum2, RQDFsum2, DFDFsum2 = sharedmem.empty([3] + chunkshape) with sharedmem.MapReduce() as pool: Qfull = correlate.points(qpos, extra=numpy.arange(len(qpos))) print 'Qfull:', Qfull.tree.min, Qfull.tree.max Rfull = correlate.points(rpos, extra=numpy.arange(len(rpos))) print 'Rfull:', Rfull.tree.min, Rfull.tree.max Ffull = correlate.field(fpos, value=fdelta, extra=objectid) print 'Ffull:', Ffull.tree.min, Ffull.tree.max def work(i): with pool.critical: print 'doing chunk', i, Nchunks Qchunk = correlate.points(qpos[qchunks[i]], extra=Qfull.extra[qchunks[i]]) Rchunk = correlate.points(rpos[rchunks[i]], extra=Rfull.extra[rchunks[i]]) Fchunk = correlate.field(fpos[fchunks[i]], value=fdelta[fchunks[i]], extra=objectid[fchunks[i]] ) #Q-Q DQDQ[i, ...] = correlate.paircount(Qchunk, Qfull, binning, np=0).fullsum1 RQDQ[i, ...] = correlate.paircount(Rchunk, Qfull, binning, np=0).fullsum1 RQRQ[i, ...] = correlate.paircount(Rchunk, Rfull, binning, np=0).fullsum1 #Q-F DQDF = correlate.paircount(Qchunk, Ffull, binning, np=0) DQDFsum1[:, i, ...] = DQDF.fullsum1 DQDFsum2[i, ...] = DQDF.fullsum2 RQDF = correlate.paircount(Rchunk, Ffull, binning, np=0) RQDFsum1[:, i, ...] = RQDF.fullsum1 RQDFsum2[i, ...] = RQDF.fullsum2 #F-F DFDF = correlate.paircount(Fchunk, Ffull, binning, np=0) DFDFsum1[:, i, ...] = DFDF.fullsum1 DFDFsum2[i, ...] = DFDF.fullsum2 with pool.critical: print 'done chunk', i, Nchunks, len(fchunks[i]) pool.map(work, range(Nchunks)) red = MakeBootstrapSample(r, mu, DQDQ, RQDQ, RQRQ, DQDFsum1[0], DQDFsum2, RQDFsum1[0], RQDFsum2, DFDFsum1[0], DFDFsum2, len(qpos), len(rpos)) real = MakeBootstrapSample(r, mu, DQDQ, RQDQ, RQRQ, DQDFsum1[1], DQDFsum2, RQDFsum1[1], RQDFsum2, DFDFsum1[1], DFDFsum2, len(qpos), len(rpos)) delta = MakeBootstrapSample(r, mu, DQDQ, RQDQ, RQRQ, DQDFsum1[2], DQDFsum2, RQDFsum1[2], RQDFsum2, DFDFsum1[2], DFDFsum2, len(qpos), len(rpos)) numpy.savez(os.path.join(A.datadir, 'bootstrap.npz'), r=r, mu=mu, DQDQ=DQDQ, RQDQ=RQDQ, RQRQ=RQRQ, DQDFsum1=DQDFsum1, DQDFsum2=DQDFsum2, RQDFsum1=RQDFsum1, RQDFsum2=RQDFsum2, DFDFsum1=DFDFsum1, DFDFsum2=DFDFsum2, Qchunksize=qchunks.end - qchunks.start, Rchunksize=rchunks.end - rchunks.start, Fchunksize=fchunks.end - fchunks.start, red=red, real=real, delta=delta )
def main(): comm = MPI.COMM_WORLD SNAP, LABEL = None, None if comm.rank == 0: SNAP = files.Snapshot(ns.snapfilename, files.TPMSnapshotFile) LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile) SNAP = comm.bcast(SNAP) LABEL = comm.bcast(LABEL) Ntot = sum(SNAP.npart) assert Ntot == sum(LABEL.npart) h = files.HaloFile(ns.halocatalogue) N = h.read_mass() N0 = Ntot - sum(N[1:]) # halos are assigned to ranks 0, 1, 2, 3 ... halorank = numpy.arange(len(N)) % comm.size # but non halos are special we will fix it later. halorank[0] = -1 NonhaloStart = comm.rank * int(N0) // comm.size NonhaloEnd = (comm.rank + 1)* int(N0) // comm.size myNtotal = numpy.sum(N[halorank == comm.rank], dtype='i8') + (NonhaloEnd - NonhaloStart) print("Rank %d NonhaloStart %d NonhaloEnd %d myNtotal %d" % (comm.rank, NonhaloStart, NonhaloEnd, myNtotal)) data = numpy.empty(myNtotal, dtype=[ ('Position', ('f4', 3)), ('Label', ('i4')), ('Rank', ('i4')), ]) allNtotal = comm.allgather(myNtotal) start = sum(allNtotal[:comm.rank]) end = sum(allNtotal[:comm.rank+1]) data['Position'] = SNAP.read("Position", start, end) data['Label'] = LABEL.read("Label", start, end) data['Rank'] = halorank[data['Label']] # now assign ranks to nonhalo particles nonhalomask = (data['Label'] == 0) nonhalocount = comm.allgather(nonhalomask.sum()) data['Rank'][nonhalomask] = (sum(nonhalocount[:comm.rank]) + numpy.arange(nonhalomask.sum())) % comm.size mpsort.sort(data, orderby='Rank') arg = data['Label'].argsort() data = data[arg] ul = numpy.unique(data['Label']) bins = correlate.RBinning(40./ ns.boxsize, Nbins=ns.Nmesh) sum1 = numpy.zeros(len(bins.centers)) for l in ul: if l == 0: continue start = data['Label'].searchsorted(l, side='left') end = data['Label'].searchsorted(l, side='right') pos = data['Position'][start:end] dataset = correlate.points(pos, boxsize=1.0) result = correlate.paircount(dataset, dataset, bins, np=0) sum1 += result.sum1 if l % 1000 == 0: print l sum1 = comm.allreduce(sum1, MPI.SUM) Ntot = sum(SNAP.npart) RR = 4. / 3 * numpy.pi * numpy.diff(bins.edges**3) * (1.0 * Ntot *Ntot) k = numpy.arange(ns.Nmesh // 2) * 2 * numpy.pi / ns.boxsize # asymtotically zero at r. The mean doesn't matter as # we don't use zero k mode anyways. k, p = corrfrompower(bins.centers * ns.boxsize, sum1 / RR, R=k) # inverse FT factor p *= (2 * numpy.pi) ** 3 if comm.rank == 0: if ns.output != '-': ff = open(ns.output, 'w') ff2 = open(ns.output +'.xi' , 'w') with ff2: numpy.savetxt(ff2, zip(bins.centers, sum1 / RR - 1.0)) else: ff = stdout with ff: # numpy.savetxt(ff, zip(bins.centers, sum1 / RR - 1.0)) numpy.savetxt(ff, zip(k, p))