def ztree(self, zkey=None, scale=None, minthresh=10, maxthresh=20, np=None): if scale is None: scale = fc.scale(self['locations'].min(axis=0), self['locations'].ptp(axis=0)) zkey = sharedmem.empty(self.numpoints, dtype=fc.fckeytype) with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 1024 def work(i): X, Y, Z = self['locations'][i:i + chunksize].T fc.encode(X, Y, Z, scale=scale, out=zkey[i:i + chunksize]) pool.map(work, range(0, len(zkey), chunksize)) # use sharemem.argsort, because it is faster arg = sharedmem.argsort(zkey, np=np) return zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=minthresh, maxthresh=maxthresh)
def chop(Nside, pos): """ bootstrap the sky, returns about 100 chunks, only 50 of them are big""" # we paint quasar uniformly as long as it is covered by sdss: Npix = chealpy.nside2npix(Nside) chunkid = sharedmem.empty(len(pos), dtype='intp') print len(pos) with sharedmem.MapReduce() as pool: chunksize = 1024 * 1024 def work(i): sl = slice(i, i + chunksize) chunkid[sl] = chealpy.vec2pix_nest(Nside, pos[sl]) pool.map(work, range(0, len(pos), chunksize)) arg = sharedmem.argsort(chunkid) chunksize = sharedmem.array.bincount(chunkid, minlength=Npix) assert (chunksize == numpy.bincount(chunkid, minlength=Npix)).all() return sharedmem.array.packarray(arg, chunksize)
def ztree(self, zkey=None, scale=None, minthresh=10, maxthresh=20, np=None): if scale is None: scale = fc.scale(self['locations'].min(axis=0), self['locations'].ptp(axis=0)) zkey = sharedmem.empty(self.numpoints, dtype=fc.fckeytype) with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 1024 def work(i): X, Y, Z = self['locations'][i:i+chunksize].T fc.encode(X, Y, Z, scale=scale, out=zkey[i:i+chunksize]) pool.map(work, range(0, len(zkey), chunksize)) # use sharemem.argsort, because it is faster arg = sharedmem.argsort(zkey, np=np) return zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=minthresh, maxthresh=maxthresh)
def fixmainid(self, merger): arg = sharedmem.argsort(self.data['mainid']) self.data[...] = self.data[arg] left = self.data['mainid'].searchsorted(merger['swallowed'], side='left') right = self.data['mainid'].searchsorted(merger['swallowed'], side='right') mask = self.data['mainid'][left] == merger['swallowed'] print 'fixing mainid', mask.sum(), 'remaining' left = left[mask] right = right[mask] for i, row in enumerate(merger[mask]): time, after, swallowed = row l = left[i] r = right[i] self.data['mainid'][l:r] = after #assert (self.data['time'][l:r] == time).any() #assert (self[after]['time'] == time).any() return mask.sum()
def fixparentmass(self, merger): arg = sharedmem.argsort(self.data['id']) self.data[...] = self.data[arg] left = self.data['id'].searchsorted(merger['swallowed'], side='left') right = self.data['id'].searchsorted(merger['swallowed'], side='right') mask = self.data['id'][left] == merger['swallowed'] left = left[mask] right = right[mask] print 'fix mass', mask.sum() for i, row in enumerate(merger[mask]): time, after, swallowed = row l = left[i] r = right[i] last = numpy.abs(self.data['time'][l:r] - time).argsort()[:1] #assert (self.data['time'][l:r] == time).any() #assert (self[after]['time'] == time).any() aftermask = self['id'] == after match = numpy.abs(self.data[aftermask]['time'] - time).argsort()[:1] m = numpy.max([self.data['mass'][l:r][last].max(), self.data[aftermask]['mass'][match].max()]) self.data['mass'][l:r][last] = m # numpy.nan self.data['mass'][aftermask.nonzero()[0][match]] = m #numpy.nan
raise NotImplemented("sorting has been deprecated") import sharedmem import numpy numpy.random.seed(1) a = numpy.random.random(10000000) arg = sharedmem.argsort(a, chunksize=10240) print a[arg] assert (a[arg[1:]] >= a[arg[:-1]]).all()
def paint(pos, color, luminosity, sml, camera, CCD, tree=None, return_tree_and_sml=False, normalize=True, np=None, direct_write=False, cumulative=True): """ pos = (x, y, z) color can be None or 1 or array luminosity can be None or 1 or array sml can be None or 1, or array camera is Camera CCD is array of camera.shape, 2. CCD[..., 0] is the color channel (sum of color * luminosity) CCD[..., 1] is the luminosity channel (sum of luminosity) if color is None, CCD.shape == camera.shape if color is not None, CCD.shape == camera.shape, 2 CCD[..., 0] is color CCD[..., 1] is brightness if normalize is False, do not do CCD[..., 0] will be the weighted sum of color. if normalize is True, CCD[..., 0] will be the weighted average of color if direct_write is true, each process will directly write to CCD (CCD must be on sharedmem) if cumulative is False, original content in CCD will be disregarded. if cumulative is True, original content in CCD will be preserved (+=) """ CCDlimit = 20 * 1024 * 1024 # 20M pixel per small CCD camera.shape = (CCD.shape[0], CCD.shape[1]) nCCD = int((CCD.shape[0] * CCD.shape[1] / CCDlimit) ** 0.5) if np is None: np = sharedmem.cpu_count() if nCCD <= np ** 0.5: nCCD = int(np ** 0.5 + 1) cams = camera.divide(nCCD, nCCD) cams = cams.reshape(-1, 3) if tree is None: scale = fc.scale([x.min() for x in pos], [x.ptp() for x in pos]) zkey = sharedmem.empty(len(pos[0]), dtype=fc.fckeytype) with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 1024 def work(i): sl = slice(i, i+chunksize) x, y, z = pos fc.encode(x[sl], y[sl], z[sl], scale=scale, out=zkey[i:i+chunksize]) pool.map(work, range(0, len(zkey), chunksize)) arg = sharedmem.argsort(zkey) tree = zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=8, maxthresh=20) if sml is None: sml = sharedmem.empty(len(zkey), 'f4') with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 64 def work(i): setupsml(tree, [x[i:i+chunksize] for x in pos], out=sml[i:i+chunksize]) pool.map(work, range(0, len(zkey), chunksize)) def writeCCD(i, sparse, myCCD): cam, ox, oy = cams[i] #print i, sparse, len(cams) if sparse: index, C, L = myCCD x = index[0] + ox y = index[1] + oy p = CCD.flat if color is not None: ind = numpy.ravel_multi_index((x, y, 0), CCD.shape) if cumulative: p[ind] += C else: p[ind] = C ind = numpy.ravel_multi_index((x, y, 1), CCD.shape) if cumulative: p[ind] += L else: p[ind] = L else: ind = numpy.ravel_multi_index((x, y), CCD.shape) if cumulative: p[ind] += L else: p[ind] = L else: if color is not None: if cumulative: CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1], :] += myCCD else: CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1], :] = myCCD else: if cumulative: CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1]] += myCCD[..., 1] else: CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1]] = myCCD[..., 1] with sharedmem.MapReduce(np=np) as pool: def work(i): cam, ox, oy = cams[i] myCCD = numpy.zeros(cam.shape, dtype=('f8', 2)) cam.paint(pos[0], pos[1], pos[2], sml, color, luminosity, out=myCCD, tree=tree) mask = (myCCD[..., 1] != 0) if mask.sum() < 0.1 * myCCD[..., 1].size: index = mask.nonzero() C = myCCD[..., 0][mask] L = myCCD[..., 1][mask] sparse, myCCD = True, (index, C, L) else: sparse, myCCD = False, myCCD if not direct_write: return i, sparse, myCCD else: writeCCD(i, sparse, myCCD) return 0, 0, 0 def reduce(i, sparse, myCCD): if not direct_write: writeCCD(i, sparse, myCCD) pool.map(work, range(len(cams)), reduce=reduce) if color is not None and normalize: CCD[..., 0] /= CCD[..., 1] if return_tree_and_sml: return CCD, tree, sml else: tree = None sml = None return CCD
offset = numpy.concatenate(([0], size.cumsum())) print "allocation total pars", total idall = sharedmem.empty(total, dtype=first.schema["id"].dtype) sfrall = sharedmem.empty(total, dtype="f4") def work(i): snap = Snapshot(argv[1] % i, "cmugadget", template=first) print "i", i sfr = snap[0, "sfr"] id = snap[0, "id"] print "i", i, "read" mask = sfr > 0 idall[offset[i] : offset[i] + size[i]] = id[mask] sfrall[offset[i] : offset[i] + size[i]] = sfr[mask] print "i", i, "assign" print "i", i, mask.sum() with sharedmem.Pool() as pool: pool.map(work, range(first.C["Nfiles"])) print "phase 2 done" arg = sharedmem.argsort(idall) idall = idall[arg] sfrall = sfrall[arg] idall.tofile("sfgasid") sfrall.tofile("sfgassfr")
size = numpy.array(size, 'i8') total = size.sum() offset = numpy.concatenate(([0], size.cumsum())) print 'allocation total pars', total idall = sharedmem.empty(total, dtype=first.schema['id'].dtype) sfrall = sharedmem.empty(total, dtype='f4') def work(i): snap = Snapshot(argv[1] % i, 'cmugadget', template=first) print 'i', i sfr = snap[0, 'sfr'] id = snap[0, 'id'] print 'i', i, 'read' mask = sfr > 0 idall[offset[i]:offset[i]+size[i]] = id[mask] sfrall[offset[i]:offset[i]+size[i]] = sfr[mask] print 'i', i, 'assign' print 'i', i, mask.sum() with sharedmem.Pool() as pool: pool.map(work, range(first.C['Nfiles'])) print 'phase 2 done' arg = sharedmem.argsort(idall) idall = idall[arg] sfrall = sfrall[arg] idall.tofile('sfgasid') sfrall.tofile('sfgassfr')