Пример #1
0
    def ztree(self,
              zkey=None,
              scale=None,
              minthresh=10,
              maxthresh=20,
              np=None):
        if scale is None:
            scale = fc.scale(self['locations'].min(axis=0),
                             self['locations'].ptp(axis=0))
        zkey = sharedmem.empty(self.numpoints, dtype=fc.fckeytype)

        with sharedmem.MapReduce(np=np) as pool:
            chunksize = 1024 * 1024

            def work(i):
                X, Y, Z = self['locations'][i:i + chunksize].T
                fc.encode(X, Y, Z, scale=scale, out=zkey[i:i + chunksize])

            pool.map(work, range(0, len(zkey), chunksize))

        # use sharemem.argsort, because it is faster
        arg = sharedmem.argsort(zkey, np=np)

        return zt.Tree(zkey=zkey,
                       scale=scale,
                       arg=arg,
                       minthresh=minthresh,
                       maxthresh=maxthresh)
Пример #2
0
def chop(Nside, pos):
    """ bootstrap the sky, returns about 100 chunks, only 50 of them are big"""
    # we paint quasar uniformly as long as it is covered by sdss:
    Npix = chealpy.nside2npix(Nside)
    chunkid = sharedmem.empty(len(pos), dtype='intp')
    print len(pos)
    with sharedmem.MapReduce() as pool:
        chunksize = 1024 * 1024
        def work(i):
            sl = slice(i, i + chunksize)
            chunkid[sl] = chealpy.vec2pix_nest(Nside, pos[sl])
        pool.map(work, range(0, len(pos), chunksize))
    arg = sharedmem.argsort(chunkid)
    chunksize = sharedmem.array.bincount(chunkid, minlength=Npix)
    assert (chunksize == numpy.bincount(chunkid, minlength=Npix)).all()
    return sharedmem.array.packarray(arg, chunksize)
Пример #3
0
  def ztree(self, zkey=None, scale=None, minthresh=10, maxthresh=20, np=None):
    if scale is None:
      scale = fc.scale(self['locations'].min(axis=0), self['locations'].ptp(axis=0))
    zkey = sharedmem.empty(self.numpoints, dtype=fc.fckeytype)

    with sharedmem.MapReduce(np=np) as pool:
      chunksize = 1024 * 1024
      def work(i):
        X, Y, Z = self['locations'][i:i+chunksize].T
        fc.encode(X, Y, Z, scale=scale, out=zkey[i:i+chunksize])
      pool.map(work, range(0, len(zkey), chunksize))

    # use sharemem.argsort, because it is faster
    arg = sharedmem.argsort(zkey, np=np)

    return zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=minthresh, maxthresh=maxthresh)
Пример #4
0
  def fixmainid(self, merger):
    arg = sharedmem.argsort(self.data['mainid'])
    self.data[...] = self.data[arg]

    left = self.data['mainid'].searchsorted(merger['swallowed'], side='left')
    right = self.data['mainid'].searchsorted(merger['swallowed'], side='right')
    mask = self.data['mainid'][left] == merger['swallowed']
    print 'fixing mainid', mask.sum(), 'remaining'
    left = left[mask]
    right = right[mask]
    for i, row in enumerate(merger[mask]):
      time, after, swallowed = row
      l = left[i]
      r = right[i]
      self.data['mainid'][l:r] = after
      #assert (self.data['time'][l:r] == time).any()
      #assert (self[after]['time'] == time).any()

    return mask.sum()
Пример #5
0
  def fixmainid(self, merger):
    arg = sharedmem.argsort(self.data['mainid'])
    self.data[...] = self.data[arg]

    left = self.data['mainid'].searchsorted(merger['swallowed'], side='left')
    right = self.data['mainid'].searchsorted(merger['swallowed'], side='right')
    mask = self.data['mainid'][left] == merger['swallowed']
    print 'fixing mainid', mask.sum(), 'remaining'
    left = left[mask]
    right = right[mask]
    for i, row in enumerate(merger[mask]):
      time, after, swallowed = row
      l = left[i]
      r = right[i]
      self.data['mainid'][l:r] = after
      #assert (self.data['time'][l:r] == time).any()
      #assert (self[after]['time'] == time).any()

    return mask.sum()
Пример #6
0
  def fixparentmass(self, merger):
    arg = sharedmem.argsort(self.data['id'])
    self.data[...] = self.data[arg]
    left = self.data['id'].searchsorted(merger['swallowed'], side='left')
    right = self.data['id'].searchsorted(merger['swallowed'], side='right')
    mask = self.data['id'][left] == merger['swallowed']
    left = left[mask]
    right = right[mask]
    print 'fix mass', mask.sum()
    for i, row in enumerate(merger[mask]):
      time, after, swallowed = row
      l = left[i]
      r = right[i]
      last = numpy.abs(self.data['time'][l:r] - time).argsort()[:1]
      #assert (self.data['time'][l:r] == time).any()
      #assert (self[after]['time'] == time).any()
      aftermask = self['id'] == after
      match = numpy.abs(self.data[aftermask]['time'] - time).argsort()[:1]

      m = numpy.max([self.data['mass'][l:r][last].max(), self.data[aftermask]['mass'][match].max()])
      self.data['mass'][l:r][last] = m # numpy.nan
      self.data['mass'][aftermask.nonzero()[0][match]] = m #numpy.nan
Пример #7
0
  def fixparentmass(self, merger):
    arg = sharedmem.argsort(self.data['id'])
    self.data[...] = self.data[arg]
    left = self.data['id'].searchsorted(merger['swallowed'], side='left')
    right = self.data['id'].searchsorted(merger['swallowed'], side='right')
    mask = self.data['id'][left] == merger['swallowed']
    left = left[mask]
    right = right[mask]
    print 'fix mass', mask.sum()
    for i, row in enumerate(merger[mask]):
      time, after, swallowed = row
      l = left[i]
      r = right[i]
      last = numpy.abs(self.data['time'][l:r] - time).argsort()[:1]
      #assert (self.data['time'][l:r] == time).any()
      #assert (self[after]['time'] == time).any()
      aftermask = self['id'] == after
      match = numpy.abs(self.data[aftermask]['time'] - time).argsort()[:1]

      m = numpy.max([self.data['mass'][l:r][last].max(), self.data[aftermask]['mass'][match].max()])
      self.data['mass'][l:r][last] = m # numpy.nan
      self.data['mass'][aftermask.nonzero()[0][match]] = m #numpy.nan
Пример #8
0
raise NotImplemented("sorting has been deprecated")

import sharedmem

import numpy
numpy.random.seed(1)
a = numpy.random.random(10000000)
arg = sharedmem.argsort(a, chunksize=10240)

print a[arg]
assert (a[arg[1:]] >= a[arg[:-1]]).all()

Пример #9
0
def paint(pos, color, luminosity, sml, camera, CCD, tree=None,
        return_tree_and_sml=False, normalize=True, np=None, direct_write=False,
        cumulative=True):
    """ pos = (x, y, z)
        color can be None or 1 or array
        luminosity can be None or 1 or array
        sml can be None or 1, or array
        camera is Camera
        CCD is array of camera.shape, 2.
        CCD[..., 0] is the color channel (sum of color * luminosity)
        CCD[..., 1] is the luminosity channel (sum of luminosity)

        if color is None, CCD.shape == camera.shape
        if color is not None, 
            CCD.shape == camera.shape, 2
            CCD[..., 0] is color
            CCD[..., 1] is brightness

        if normalize is False, do not do CCD[..., 0] will be
        the weighted sum of color.
        if normalize is True, CCD[..., 0] will be the weighted average of color

        if direct_write is true, each process will directly write to CCD (CCD
        must be on sharedmem)

        if cumulative is False, original content in CCD will be disregarded.
        if cumulative is True, original content in CCD will be preserved (+=)
    """
    CCDlimit = 20 * 1024 * 1024 # 20M pixel per small CCD
    camera.shape = (CCD.shape[0], CCD.shape[1])
    nCCD = int((CCD.shape[0] * CCD.shape[1] / CCDlimit) ** 0.5)
    if np is None: np = sharedmem.cpu_count()
    if nCCD <= np ** 0.5: 
        nCCD = int(np ** 0.5 + 1)
    cams = camera.divide(nCCD, nCCD)
    cams = cams.reshape(-1, 3)

    if tree is None:
        scale = fc.scale([x.min() for x in pos], [x.ptp() for x in pos])
        zkey = sharedmem.empty(len(pos[0]), dtype=fc.fckeytype)

        with sharedmem.MapReduce(np=np) as pool:
            chunksize = 1024 * 1024
            def work(i):
                sl = slice(i, i+chunksize)        
                x, y, z = pos
                fc.encode(x[sl], y[sl], z[sl], scale=scale, out=zkey[i:i+chunksize])
            pool.map(work, range(0, len(zkey), chunksize))

        arg = sharedmem.argsort(zkey)

        tree = zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=8, maxthresh=20)
    if sml is None:
        sml = sharedmem.empty(len(zkey), 'f4')
        with sharedmem.MapReduce(np=np) as pool:
            chunksize = 1024 * 64
            def work(i):
                setupsml(tree, [x[i:i+chunksize] for x in pos],
                        out=sml[i:i+chunksize])
            pool.map(work, range(0, len(zkey), chunksize))

    def writeCCD(i, sparse, myCCD):
        cam, ox, oy = cams[i]
        #print i, sparse, len(cams)
        if sparse:
            index, C, L = myCCD
            x = index[0] + ox
            y = index[1] + oy
            p = CCD.flat
            if color is not None:
                ind = numpy.ravel_multi_index((x, y, 0), CCD.shape)
                if cumulative:
                    p[ind] += C
                else:
                    p[ind] = C
                ind = numpy.ravel_multi_index((x, y, 1), CCD.shape)
                if cumulative:
                    p[ind] += L
                else:
                    p[ind] = L
            else:
                ind = numpy.ravel_multi_index((x, y), CCD.shape)
                if cumulative:
                    p[ind] += L
                else:
                    p[ind] = L
        else:
            if color is not None:
                if cumulative:
                    CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1], :] += myCCD
                else:
                    CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1], :] = myCCD
            else:
                if cumulative:
                    CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1]] += myCCD[..., 1]
                else:
                    CCD[ox:ox + cam.shape[0], oy:oy+cam.shape[1]] = myCCD[..., 1]

    with sharedmem.MapReduce(np=np) as pool:
        def work(i):
            cam, ox, oy = cams[i]
            myCCD = numpy.zeros(cam.shape, dtype=('f8', 2))
            cam.paint(pos[0], pos[1], pos[2], 
                    sml, color, luminosity, out=myCCD, tree=tree)
            mask = (myCCD[..., 1] != 0)
            if mask.sum() < 0.1 * myCCD[..., 1].size:
                index = mask.nonzero()
                C = myCCD[..., 0][mask]
                L = myCCD[..., 1][mask]
                sparse, myCCD = True, (index, C, L)
            else:
                sparse, myCCD = False, myCCD
            if not direct_write:
                return i, sparse, myCCD
            else:
                writeCCD(i, sparse, myCCD)
                return 0, 0, 0
        def reduce(i, sparse, myCCD):
            if not direct_write:
                writeCCD(i, sparse, myCCD)
        pool.map(work, range(len(cams)), reduce=reduce)

    if color is not None and normalize:
        CCD[..., 0] /= CCD[..., 1]
    if return_tree_and_sml:
        return CCD, tree, sml
    else:
        tree = None
        sml = None
        return CCD
Пример #10
0
offset = numpy.concatenate(([0], size.cumsum()))

print "allocation total pars", total
idall = sharedmem.empty(total, dtype=first.schema["id"].dtype)
sfrall = sharedmem.empty(total, dtype="f4")


def work(i):
    snap = Snapshot(argv[1] % i, "cmugadget", template=first)
    print "i", i
    sfr = snap[0, "sfr"]
    id = snap[0, "id"]
    print "i", i, "read"
    mask = sfr > 0
    idall[offset[i] : offset[i] + size[i]] = id[mask]
    sfrall[offset[i] : offset[i] + size[i]] = sfr[mask]
    print "i", i, "assign"
    print "i", i, mask.sum()


with sharedmem.Pool() as pool:
    pool.map(work, range(first.C["Nfiles"]))
print "phase 2 done"

arg = sharedmem.argsort(idall)
idall = idall[arg]
sfrall = sfrall[arg]

idall.tofile("sfgasid")
sfrall.tofile("sfgassfr")
Пример #11
0
size = numpy.array(size, 'i8')
total = size.sum()
offset = numpy.concatenate(([0], size.cumsum()))

print 'allocation total pars', total
idall = sharedmem.empty(total, dtype=first.schema['id'].dtype)
sfrall = sharedmem.empty(total, dtype='f4')

def work(i):
    snap = Snapshot(argv[1] % i, 'cmugadget', template=first)
    print 'i', i
    sfr = snap[0, 'sfr']
    id = snap[0, 'id']
    print 'i', i, 'read'
    mask = sfr > 0
    idall[offset[i]:offset[i]+size[i]] = id[mask]
    sfrall[offset[i]:offset[i]+size[i]] = sfr[mask]
    print 'i', i, 'assign'
    print 'i', i, mask.sum()
with sharedmem.Pool() as pool:
    pool.map(work, range(first.C['Nfiles']))
print 'phase 2 done'

arg = sharedmem.argsort(idall)
idall = idall[arg]
sfrall = sfrall[arg]

idall.tofile('sfgasid')
sfrall.tofile('sfgassfr')