Exemplo n.º 1
0
def test_empty_sort(comm, tuning):
    s = numpy.empty(0, dtype=[
        ('vkey', ('u8', 3)),
        ('vector', ('u4', 3)),
    ])

    mpsort.sort(s, 'vkey', out=s, comm=comm, tuning=tuning)
Exemplo n.º 2
0
def main():
    comm = MPI.COMM_WORLD
 
    if comm.rank == 0:
        snapfile = files.Snapshot(ns.snapfilename, files.TPMSnapshotFile)
        labelfile = files.Snapshot(ns.halolabel, files.HaloLabelFile)
        npart = snapfile.npart
        output = files.Snapshot.create(ns.output, files.TPMSnapshotFile, npart)
        comm.bcast((snapfile, labelfile, output))
    else:
        snapfile, labelfile, output = comm.bcast(None) 
    comm.barrier()

    Ntot = sum(snapfile.npart)
    mystart = Ntot * comm.rank // comm.size
    myend =  Ntot * (comm.rank + 1) // comm.size

    for field in ['Position', 'Velocity', 'ID']:
        content = snapfile.read(field, mystart, myend)
        if len(content.shape) == 1:
            dtype = numpy.dtype(content.dtype)
        else:
            dtype = numpy.dtype((content.dtype, content.shape[1:]))
        data = numpy.empty(myend - mystart, dtype=[
            ('Label', 'u8'),
            ('content', dtype),
                ])
        data['content'] = content
        content = None
        data['Label'] = labelfile.read('Label', mystart, myend)
        nonhalo = data['Label'] == 0
        data['Label'][nonhalo] = numpy.iinfo('u8').max
        mpsort.sort(data, orderby='Label')
        
        output.write(field, mystart, data['content'])
Exemplo n.º 3
0
    def concat(kls, *args, **kwargs):
        """
        Append several distributed arrays into one.

        Parameters
        ----------
        localsize : None

        """

        localsize = kwargs.pop('localsize', None)

        comm = args[0].comm

        localsize_in = sum([len(arg.local) for arg in args])

        if localsize is None:
            localsize = sum([len(arg.local) for arg in args])

        eldtype = numpy.result_type(*[arg.local for arg in args])

        dtype = [('index', 'intp'), ('el', eldtype)]

        inp = numpy.empty(localsize_in, dtype=dtype)
        out = numpy.empty(localsize, dtype=dtype)

        go = 0
        o = 0
        for arg in args:
            inp['index'][o:o + len(arg.local)] = go + arg.coffset + numpy.arange(len(arg.local), dtype='intp')
            inp['el'][o:o + len(arg.local)]    = arg.local
            o = o + len(arg.local)
            go = go + arg.cshape[0]
        mpsort.sort(inp, orderby='index', out=out, comm=comm)
        return DistributedArray(out['el'].copy(), comm=comm)
Exemplo n.º 4
0
    def sort(self, orderby=None):
        """
        Sort array globally by key orderby.

        Due to a limitation of mpsort, self[orderby] must be u8.

        """
        mpsort.sort(self.local, orderby, comm=self.comm)
Exemplo n.º 5
0
def test_sort_u4(comm):
    s = numpy.uint32(numpy.random.random(size=1000) * 1000 - 400)

    local = split(s, comm)
    s = heal(local, comm)

    mpsort.sort(local, orderby=None, out=None, comm=comm)

    r = heal(local, comm)
    s.sort()
    assert_array_equal(s, r)
Exemplo n.º 6
0
def test_sort_u4(comm):
    s = numpy.uint32(numpy.random.random(size=1000) * 1000 - 400)

    local = split(s, comm)
    s = heal(local, comm)

    mpsort.sort(local, orderby=None, out=None, comm=comm)

    r = heal(local, comm)
    s.sort()
    assert_array_equal(s, r)
Exemplo n.º 7
0
def test_sort_tunings(comm, tuning):
    s = numpy.int32(numpy.random.random(size=1000) * 1000)

    local = split(s, comm)
    s = heal(local, comm)

    g = comm.allgather(local.size)
    mpsort.sort(local, orderby=None, out=None, comm=comm, tuning=tuning)

    r = heal(local, comm)
    s.sort()
    assert_array_equal(s, r)
Exemplo n.º 8
0
def test_sort_inplace(comm):
    s = numpy.int32(numpy.random.random(size=1000) * 1000)

    local = split(s, comm)
    s = heal(local, comm)

    g = comm.allgather(local.size)
    mpsort.sort(local, local, out=None, comm=comm)

    r = heal(local, comm)
    s.sort()
    assert_array_equal(s, r)
Exemplo n.º 9
0
def test_sort_inplace(comm):
    s = numpy.int32(numpy.random.random(size=1000) * 1000)

    local = split(s, comm)
    s = heal(local, comm)

    g = comm.allgather(local.size)
    mpsort.sort(local, local, out=None, comm=comm)

    r = heal(local, comm)
    s.sort()
    assert_array_equal(s, r)
Exemplo n.º 10
0
def test_sort_tunings(comm):
    for tuning in TUNINGS:
        s = numpy.int32(numpy.random.random(size=1000) * 1000)

        local = split(s, comm)
        s = heal(local, comm)

        g = comm.allgather(local.size)
        mpsort.sort(local, orderby=None, out=None, comm=comm, tuning=tuning)

        r = heal(local, comm)
        s.sort()
        assert_array_equal(s, r)
Exemplo n.º 11
0
def test_sort_mismatched_zeros(comm):
    s = numpy.int32(numpy.random.random(size=1000) * 1000)

    local = split(s, comm, [0, 400, 0, 600][comm.rank])
    s = heal(local, comm)

    res = split(s, comm, [200, 200, 0, 600][comm.rank])
    res[:] = numpy.int32(numpy.random.random(size=res.size) * 1000)
    mpsort.sort(local, local, out=res, comm=comm, tuning=['REQUIRE_GATHER_SORT'])

    s.sort()

    r = heal(res, comm)
    assert_array_equal(s, r)
Exemplo n.º 12
0
def test_sort_flatiter(comm):
    s = numpy.int32(numpy.random.random(size=1000) * 1000)

    local = split(s, comm)
    s = heal(local, comm)

    res = numpy.zeros(adjustsize(local.size, comm), dtype=local.dtype)

    mpsort.sort(local.flat, local.flat, out=res.flat, comm=comm)

    s.sort()

    r = heal(res, comm)
    assert_array_equal(s, r)
Exemplo n.º 13
0
def test_sort_flatiter(comm):
    s = numpy.int32(numpy.random.random(size=1000) * 1000)

    local = split(s, comm)
    s = heal(local, comm)

    res = numpy.zeros(adjustsize(local.size, comm), dtype=local.dtype)

    mpsort.sort(local.flat, local.flat, out=res.flat, comm=comm)

    s.sort()

    r = heal(res, comm)
    assert_array_equal(s, r)
Exemplo n.º 14
0
def test_sort_mismatched_zeros(comm):
    s = numpy.int32(numpy.random.random(size=1000) * 1000)

    local = split(s, comm, [0, 400, 0, 600][comm.rank])
    s = heal(local, comm)

    res = split(s, comm, [200, 200, 0, 600][comm.rank])
    res[:] = numpy.int32(numpy.random.random(size=res.size) * 1000)
    mpsort.sort(local, local, out=res, comm=comm, tuning=['REQUIRE_GATHER_SORT'])

    s.sort()

    r = heal(res, comm)
    assert_array_equal(s, r)
Exemplo n.º 15
0
Arquivo: pm.py Projeto: nickhand/pypm
    def sort(self, out=None):
        """ Sort the field to 'C'-order, partitioned by MPI ranks. Save the
            result to flatiter.

            Parameters
            ----------
            out : numpy.flatiter
                A flatiter to store the 'C' order. If not a flatiter, the .flat
                attribute is used.

            Returns
            -------
            numpy.flatiter : the flatiter provided or created.

            Notes
            -----
            Set `out` to self.value for an 'inplace' sort.
        """
        ind = numpy.ravel_multi_index(numpy.mgrid[self.slices], self.global_shape)

        if out is None:
            out = numpy.empty_like(self.value)

        if not isinstance(out, numpy.flatiter):
            out = out.flat

        assert isinstance(out, numpy.flatiter)
        assert len(out) == self.size

        return mpsort.sort(self.flat, orderby=ind.flat, comm=self.pm.comm, out=out)
Exemplo n.º 16
0
def test_few_items(comm4, sizes, tuning):
    comm = comm4
    A = [range(sizes[i]) for i in range(len(sizes)) ]
    s = numpy.empty(len(A[comm.rank]), dtype=[
        ('vkey', ('u8', 3)),
        ('vector', ('u4', 3)),
    ])

    s['vkey'] = numpy.array(A[comm.rank], dtype='u8')[:, None]
    s['vector'] = 1
    S = numpy.concatenate(comm.allgather(s))
    S.sort()
    r = numpy.empty(len(A[comm.rank]), dtype=s.dtype)
    mpsort.sort(s, 'vkey', out=r, comm=comm, tuning=tuning)
    R = numpy.concatenate(comm.allgather(r))
    comm.barrier()
    assert_array_equal(R['vkey'], S['vkey'])
    comm.barrier()
Exemplo n.º 17
0
def test_sort_struct(comm):
    s = numpy.empty(1000, dtype=[
        ('value', 'i8'),
        ('key', 'i8')])

    s['value'] = numpy.int32(numpy.random.random(size=1000) * 1000)
    s['key'] = s['value']

    local = split(s, comm)
    s = heal(local, comm)

    res = numpy.zeros_like(local)

    mpsort.sort(local, 'key', out=res, comm=comm)

    r = heal(res, comm)

    s.sort(order='key')
    assert_array_equal(s['value'], r['value'])
Exemplo n.º 18
0
def test_sort_vector(comm):
    s = numpy.empty(10, dtype=[('value', 'i8')])

    s['value'] = numpy.int32(numpy.random.random(size=len(s)) * 1000)

    local = split(s, comm)
    s = heal(local, comm)

    k = numpy.empty(len(local), ('i8', 2))
    k[:, 0][...] = local['value']
    k[:, 1][...] = local['value']

    res = numpy.zeros_like(local)

    mpsort.sort(local, k, out=res, comm=comm)

    s.sort(order='value')

    r = heal(res, comm)

    assert_array_equal(s['value'], r['value'])
Exemplo n.º 19
0
def test_sort_vector(comm):
    s = numpy.empty(10, dtype=[('value', 'i8')])

    s['value'] = numpy.int32(numpy.random.random(size=len(s)) * 1000)

    local = split(s, comm)
    s = heal(local, comm)

    k = numpy.empty(len(local), ('i8', 2))
    k[:, 0][...] = local['value']
    k[:, 1][...] = local['value']

    res = numpy.zeros_like(local)

    mpsort.sort(local, k, out=res, comm=comm)

    s.sort(order='value')

    r = heal(res, comm)

    assert_array_equal(s['value'], r['value'])
Exemplo n.º 20
0
def test_sort_struct(comm):
    s = numpy.empty(10, dtype=[
        ('value', 'i8'),
        ('key', 'i8')])

    numpy.random.seed(1234)

    s['value'] = numpy.int32(numpy.random.random(size=10) * 1000-400)
    s['key'] = s['value']

    backup = s.copy()
    local = split(s, comm)
    s = heal(local, comm)

    res = numpy.zeros_like(local)

    mpsort.sort(local, 'key', out=res, comm=comm)

    r = heal(res, comm)

    backup.sort(order='key')
    assert_array_equal(backup['value'], r['value'])
Exemplo n.º 21
0
    def run(self):
        """
        Run the TraceHalo Algorithm
        """
        import mpsort
        from nbodykit import halos
        
        comm = self.comm

        with self.source.open() as source:
            [[ID]] = source.read(['ID'], full=True)

        Ntot = self.comm.allreduce(len(ID))

        with self.sourcelabel.open() as sourcelabel:
            [[label]] = sourcelabel.read(['Label'], full=True)

        mpsort.sort(label, orderby=ID, comm=self.comm)
        del ID

        data = numpy.empty(len(label), dtype=[
                    ('ID', ('i8')), 
                    ('Position', ('f4', 3)), 
                    ('Velocity', ('f4', 3)), 
                    ])
        with self.dest.open() as dest:
            [[data['Position'][...]]] = dest.read(['Position'], full=True)
            [[data['Velocity'][...]]] = dest.read(['Velocity'], full=True)
            [[data['ID'][...]]] = dest.read(['ID'], full=True)
        mpsort.sort(data, orderby='ID', comm=self.comm)

        data['Position'] /= self.dest.BoxSize
        data['Velocity'] /= self.dest.BoxSize
        
        N = halos.count(label)
        hpos = halos.centerofmass(label, data['Position'], boxsize=1.0)
        hvel = halos.centerofmass(label, data['Velocity'], boxsize=None)
        return hpos, hvel, N, Ntot
Exemplo n.º 22
0
def test_issue7(comm12, tuning):
    # This roughly mimics the behavior of issue7, with data size of 40 bytes;
    # and radix size of 16 bytes, with 8 bytes from offset 0, and 4 bytes from offset 16.
    comm = comm12
    import base64
    import pickle
    A = [numpy.array(a, dtype='u4').reshape(-1, 10) for a in pickle.loads(base64.decodebytes(Issue7B64))]

    s = numpy.zeros(len(A[comm.rank]), dtype=[('radix', ('u8', 2)), ('ext', ('u8', 3))])

    s['radix'][:, 0] = (A[comm.rank][:, 5]) + (A[comm.rank][:, 4] << 32)
    s['radix'][:, 1] = A[comm.rank][:, 0]

    S = numpy.concatenate(comm.allgather(s))
    ind = numpy.lexsort(S['radix'].T)
    S = S[ind]
    r = numpy.empty(len(A[comm.rank]), dtype=s.dtype)
    mpsort.sort(s, orderby='radix', out=r, comm=comm, tuning=tuning)
    R = numpy.concatenate(comm.allgather(r))

    comm.barrier()
    assert_array_equal(R.flatten(), S.flatten())
    comm.barrier()
Exemplo n.º 23
0
    def concat(kls, *args, **kwargs):
        """
        Append several distributed arrays into one.

        Parameters
        ----------
        localsize : None

        """

        localsize = kwargs.pop('localsize', None)

        comm = args[0].comm

        localsize_in = sum([len(arg.local) for arg in args])

        if localsize is None:
            localsize = sum([len(arg.local) for arg in args])

        eldtype = numpy.result_type(*[arg.local for arg in args])

        dtype = [('index', 'intp'), ('el', eldtype)]

        inp = numpy.empty(localsize_in, dtype=dtype)
        out = numpy.empty(localsize, dtype=dtype)

        go = 0
        o = 0
        for arg in args:
            inp['index'][o:o +
                         len(arg.local)] = go + arg.coffset + numpy.arange(
                             len(arg.local), dtype='intp')
            inp['el'][o:o + len(arg.local)] = arg.local
            o = o + len(arg.local)
            go = go + arg.cshape[0]
        mpsort.sort(inp, orderby='index', out=out, comm=comm)
        return DistributedArray(out['el'].copy(), comm=comm)
Exemplo n.º 24
0
def test_sort_struct_vector(comm):
    s = numpy.empty(10, dtype=[
        ('value', 'i8'),
        ('key', 'i8'),
        ('vkey', ('i8', 2))])

    s['value'] = numpy.int32(numpy.random.random(size=len(s)) * 1000)

    # use a scalar key to trick numpy
    # numpy sorts as byte streams for vector fields.
    s['key'][:][...] = s['value']
    s['vkey'][:, 0][...] = s['value']
    s['vkey'][:, 1][...] = s['value']

    local = split(s, comm)
    s = heal(local, comm)

    res = numpy.zeros_like(local)

    mpsort.sort(local, 'vkey', out=res, comm=comm)

    r = heal(res, comm)
    s.sort(order='key')
    assert_array_equal(s['value'], r['value'])
Exemplo n.º 25
0
def test_sort_struct_vector(comm):
    s = numpy.empty(10, dtype=[
        ('value', 'i8'),
        ('key', 'i8'),
        ('vkey', ('i8', 2))])

    s['value'] = numpy.int32(numpy.random.random(size=len(s)) * 1000)

    # use a scalar key to trick numpy
    # numpy sorts as byte streams for vector fields.
    s['key'][:][...] = s['value']
    s['vkey'][:, 0][...] = s['value']
    s['vkey'][:, 1][...] = s['value']

    local = split(s, comm)
    s = heal(local, comm)

    res = numpy.zeros_like(local)

    mpsort.sort(local, 'vkey', out=res, comm=comm)

    r = heal(res, comm)
    s.sort(order='key')
    assert_array_equal(s['value'], r['value'])
Exemplo n.º 26
0
def main():
    comm = MPI.COMM_WORLD

    if comm.rank == 0:
        snapfile = files.Snapshot(ns.snapfilename, files.TPMSnapshotFile)
        labelfile = files.Snapshot(ns.halolabel, files.HaloLabelFile)
        npart = snapfile.npart
        output = files.Snapshot.create(ns.output, files.TPMSnapshotFile, npart)
        comm.bcast((snapfile, labelfile, output))
    else:
        snapfile, labelfile, output = comm.bcast(None)
    comm.barrier()

    Ntot = sum(snapfile.npart)
    mystart = Ntot * comm.rank // comm.size
    myend = Ntot * (comm.rank + 1) // comm.size

    for field in ['Position', 'Velocity', 'ID']:
        content = snapfile.read(field, mystart, myend)
        if len(content.shape) == 1:
            dtype = numpy.dtype(content.dtype)
        else:
            dtype = numpy.dtype((content.dtype, content.shape[1:]))
        data = numpy.empty(myend - mystart,
                           dtype=[
                               ('Label', 'u8'),
                               ('content', dtype),
                           ])
        data['content'] = content
        content = None
        data['Label'] = labelfile.read('Label', mystart, myend)
        nonhalo = data['Label'] == 0
        data['Label'][nonhalo] = numpy.iinfo('u8').max
        mpsort.sort(data, orderby='Label')

        output.write(field, mystart, data['content'])
Exemplo n.º 27
0
    def ravel(self, out=None):
        """ Ravel the field to 'C'-order, partitioned by MPI ranks. Save the
            result to flatiter.

            Parameters
            ----------
            out : numpy.flatiter, or Ellipsis for inplace
                A flatiter to store the 'C' order. If not a flatiter, the .flat
                attribute is used.

            Returns
            -------
            numpy.flatiter : the flatiter provided or created.

            Notes
            -----
            Set `out` to or Ellisps self.value for an 'inplace' ravel.
        """
        if out is None:
            out = numpy.empty_like(self.value)

        if is_inplace(out):
            out = self.value

        if not isinstance(out, numpy.flatiter):
            out = out.flat

        assert isinstance(out, numpy.flatiter)
        assert len(out) == self.size
        if self.pm.comm.size > 1:
            ind = numpy.ravel_multi_index(numpy.mgrid[self.slices], self.cshape)
            return mpsort.sort(self.flat, orderby=ind.flat, comm=self.pm.comm, out=out)
        else:
            # optimize for a single rank -- directly copy the result
            out[...] = self.flat
            return out
Exemplo n.º 28
0
def _sort_data(comm, cat, rankby, reverse=False, usecols=None):
    """
    Sort the input data by the specified columns

    Parameters
    ----------
    comm :
        the mpi communicator
    cat : CatalogSource
        the catalog holding the data to sort
    rankby : list of str
        list of columns to sort by
    reverse : bool, optional
        if ``True``, sort in descending order
    usecols : list, optional
        only sort these data columns
    """
    import mpsort

    # determine which columns we need
    if usecols is None:
        usecols = cat.columns

    # remove duplicates from usecols
    usecols = list(set(usecols))

    # the columns we need in the sort steps
    columns = list(set(rankby) | set(usecols))

    # make the data to sort
    dtype = [('_sortkey', 'i8')]
    for col in cat:
        if col in columns:
            dt = (cat[col].dtype.char, )
            dt += cat[col].shape[1:]
            if len(dt) == 1: dt = dt[0]
            dtype.append((col, dt))
    dtype = numpy.dtype(dtype)

    data = numpy.empty(cat.size, dtype=dtype)
    for col in columns:
        data[col] = cat[col]

    # sort the particles by the specified columns and store the
    # corrected sorted index
    for col in reversed(rankby):
        dt = data.dtype[col]
        rankby_name = col

        # make an integer key for floating columns
        # this assumes the lexial order of float as integer is consistant.
        if issubclass(dt.type, numpy.float32):
            data['_sortkey'] = numpy.frombuffer(data[col].tobytes(),
                                                dtype='i4')
            if reverse:
                data['_sortkey'] *= -1
            rankby_name = '_sortkey'
        elif issubclass(dt.type, numpy.float64):
            data['_sortkey'] = numpy.frombuffer(data[col].tobytes(),
                                                dtype='i8')
            if reverse:
                data['_sortkey'] *= -1
            rankby_name = '_sortkey'
        elif not issubclass(dt.type, numpy.integer):
            args = (col, str(dt))
            raise ValueError(
                "cannot sort by column '%s' with dtype '%s'; must be integer or floating type"
                % args)

        # do the parallel sort
        mpsort.sort(data, orderby=rankby_name, comm=comm)

    return data[usecols]
Exemplo n.º 29
0
    def PoissonSample(self, delta, parameters_sampling):
        nbar=parameters_sampling['nbar']
        seed1=parameters_sampling['seed1']
        seed2=parameters_sampling['seed2']

        comm = self.pm.comm
        # mean number of objects per cell
        H = self.BoxSize / self.pm.Nmesh
        overallmean = H.prod() * nbar

        # number of objects in each cell (per rank, as a RealField)
        cellmean = delta * overallmean

        # create a random state with the input seed
        rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size)

        # generate poissons. Note that we use ravel/unravel to
        # maintain MPI invariane.
        Nravel = rng.poisson(lam=cellmean.ravel())
        N = self.pm.create(type='real')
        N.unravel(Nravel)

        Ntot = N.csum()
        if self.log.isEnabledFor(logging.INFO):
            self.log.info('Poisson sampling done, total number of objects is {}'.format(Ntot))

        pos_mesh = self.pm.generate_uniform_particle_grid(shift=0.0)
        disp_mesh = np.empty_like(pos_mesh)

        # no need to do decompose because pos_mesh is strictly within the
        # local volume of the RealField.
        N_per_cell = N.readout(pos_mesh, resampler='nnb')
        for i in range(N.ndim):
            disp_mesh[:, i] = self.displacement[i].readout(pos_mesh, resampler='nnb')

        # fight round off errors, if any
        N_per_cell = np.int64(N_per_cell + 0.5)

        pos = pos_mesh.repeat(N_per_cell, axis=0)
        disp = disp_mesh.repeat(N_per_cell, axis=0)

        del pos_mesh
        del disp_mesh

        if self.log.isEnabledFor(logging.INFO):
            self.log.info("Catalog produced. Assigning in cell shift.")

        # FIXME: after pmesh update, remove this
        orderby = np.int64(pos[:, 0] / H[0] + 0.5)
        for i in range(1, delta.ndim):
            orderby[...] *= self.pm.Nmesh[i]
            orderby[...] += np.int64(pos[:, i] / H[i] + 0.5)

        # sort by ID to maintain MPI invariance.
        pos = mpsort.sort(pos, orderby=orderby, comm=comm)
        disp = mpsort.sort(disp, orderby=orderby, comm=comm)

        if self.log.isEnabledFor(logging.INFO):
            self.log.info("Sorting done")

        rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos))
        in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim,))

        pos[...] += in_cell_shift
        pos[...] %= self.pm.BoxSize

        if self.log.isEnabledFor(logging.INFO):
            self.log.info("Catalog shifted.")
            
        #Catalog needs to be shifted in z-coordinate, such that pos and comoving match
        pos[...,0]+=(self.comoving_distance-self.width)*np.ones(pos.shape[0])

        return pos, disp
Exemplo n.º 30
0
def main():
    comm = MPI.COMM_WORLD
    SNAP, LABEL = None, None
    if comm.rank == 0:
        SNAP = files.Snapshot(ns.snapfilename, files.TPMSnapshotFile)
        LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile)

    SNAP = comm.bcast(SNAP)
    LABEL = comm.bcast(LABEL)

    Ntot = sum(SNAP.npart)
    assert Ntot == sum(LABEL.npart)

    h = files.HaloFile(ns.halocatalogue)

    N = h.read_mass()

    N0 = Ntot - sum(N[1:])
    # halos are assigned to ranks 0, 1, 2, 3 ...
    halorank = numpy.arange(len(N)) % comm.size
    # but non halos are special we will fix it later.
    halorank[0] = -1

    NonhaloStart = comm.rank * int(N0) // comm.size
    NonhaloEnd = (comm.rank + 1) * int(N0) // comm.size

    myNtotal = numpy.sum(N[halorank == comm.rank],
                         dtype='i8') + (NonhaloEnd - NonhaloStart)

    print("Rank %d NonhaloStart %d NonhaloEnd %d myNtotal %d" %
          (comm.rank, NonhaloStart, NonhaloEnd, myNtotal))

    data = numpy.empty(myNtotal,
                       dtype=[
                           ('Position', ('f4', 3)),
                           ('Label', ('i4')),
                           ('Rank', ('i4')),
                       ])

    allNtotal = comm.allgather(myNtotal)
    start = sum(allNtotal[:comm.rank])
    end = sum(allNtotal[:comm.rank + 1])
    data['Position'] = SNAP.read("Position", start, end)
    data['Label'] = LABEL.read("Label", start, end)
    data['Rank'] = halorank[data['Label']]
    # now assign ranks to nonhalo particles
    nonhalomask = (data['Label'] == 0)

    nonhalocount = comm.allgather(nonhalomask.sum())

    data['Rank'][nonhalomask] = (sum(nonhalocount[:comm.rank]) +
                                 numpy.arange(nonhalomask.sum())) % comm.size

    mpsort.sort(data, orderby='Rank')

    arg = data['Label'].argsort()
    data = data[arg]

    ul = numpy.unique(data['Label'])

    bins = correlate.RBinning(40. / ns.boxsize, Nbins=ns.Nmesh)
    sum1 = numpy.zeros(len(bins.centers))

    for l in ul:
        if l == 0: continue
        start = data['Label'].searchsorted(l, side='left')
        end = data['Label'].searchsorted(l, side='right')
        pos = data['Position'][start:end]
        dataset = correlate.points(pos, boxsize=1.0)
        result = correlate.paircount(dataset, dataset, bins, np=0)
        sum1 += result.sum1
        if l % 1000 == 0:
            print l

    sum1 = comm.allreduce(sum1, MPI.SUM)
    Ntot = sum(SNAP.npart)
    RR = 4. / 3 * numpy.pi * numpy.diff(bins.edges**3) * (1.0 * Ntot * Ntot)

    k = numpy.arange(ns.Nmesh // 2) * 2 * numpy.pi / ns.boxsize
    # asymtotically zero at r. The mean doesn't matter as
    # we don't use zero k mode anyways.
    k, p = corrfrompower(bins.centers * ns.boxsize, sum1 / RR, R=k)
    # inverse FT factor
    p *= (2 * numpy.pi)**3

    if comm.rank == 0:

        if ns.output != '-':
            ff = open(ns.output, 'w')
            ff2 = open(ns.output + '.xi', 'w')
            with ff2:
                numpy.savetxt(ff2, zip(bins.centers, sum1 / RR - 1.0))
        else:
            ff = stdout
        with ff:
            #        numpy.savetxt(ff, zip(bins.centers, sum1 / RR - 1.0))
            numpy.savetxt(ff, zip(k, p))
Exemplo n.º 31
0
def cgm(comm, data, domain, rperp, rpar, los, boxsize):
    """
    Perform the cylindrical grouping method

    This outputs a structured array with the same length as the input data
    with the following fields for each object in the original data:

    #. cgm_type :
        a flag specifying the type for each object,
        with 0 specifying CGM central and 1 denoting CGM satellite
    #. cgm_haloid :
        The index of the CGM object this object belongs to; an integer
        between 0 and the total number of CGM halos
    #. num_cgm_sats :
        The number of satellites in the CGM halo

    Parameters
    ----------
    comm :
        the MPI communicator
    data : CatalogSource
        catalog with sorted input data, including Position
    domain :
        the domain decomposition
    rperp, rpar : float
        the maximum distances to group objects together in the directions
        perpendicular and parallel to the line-of-sight; the cylinder
        has radius ``rperp`` and height ``2 * rpar``
    los :
        the line-of-sight vector
    boxsize :
        the boxsize, or ``None`` if not using periodic boundary conditions
    """
    # whether we do periodic boundary conditions
    periodic = boxsize is not None
    flat_sky = los is not None

    # the maximum distance still inside the cylinder set by rperp,rpar
    rperp2 = rperp**2
    rpar2 = rpar**2
    rmax = (rperp2 + rpar2)**0.5

    pos0, origind0, sortindex0 = data.compute(data['Position'],
                                              data['origind'],
                                              data['sortindex'])

    layout1 = domain.decompose(pos0, smoothing=0)
    pos1 = layout1.exchange(pos0)
    origind1 = layout1.exchange(origind0)
    sortindex1 = layout1.exchange(sortindex0)

    # exchange particles across ranks, accounting for smoothing radius
    layout2 = domain.decompose(pos1, smoothing=rmax)
    pos2 = layout2.exchange(pos1)
    origind2 = layout2.exchange(origind1)
    sortindex2 = layout2.exchange(sortindex1)
    startrank = layout2.exchange(numpy.ones(len(pos1), dtype='i4') * comm.rank)

    # make the KD-tree
    tree1 = kdcount.KDTree(pos1, boxsize=boxsize).root
    tree2 = kdcount.KDTree(pos2, boxsize=boxsize).root

    dataframe = []
    j_gt_i = numpy.zeros(len(pos1), dtype='f4')
    wrong_rank = numpy.zeros(len(pos1), dtype='f4')

    def callback(r, i, j):

        r1 = pos1[i]
        r2 = pos2[j]
        dr = r1 - r2

        # enforce periodicity in dpos
        if periodic:
            for axis, col in enumerate(dr.T):
                col[col > boxsize[axis] * 0.5] -= boxsize[axis]
                col[col <= -boxsize[axis] * 0.5] += boxsize[axis]

        # los distance
        if flat_sky:
            rlos2 = numpy.einsum("ij,j->i", dr, los)**2
        else:
            center = 0.5 * (r1 + r2)
            dot2 = numpy.einsum('ij, ij->i', dr, center)**2
            center2 = numpy.einsum('ij, ij->i', center, center)
            rlos2 = dot2 / center2

        # sky
        dr2 = numpy.einsum('ij, ij->i', dr, dr)
        rsky2 = numpy.abs(dr2 - rlos2)

        # save the valid pairs
        # To Be Valid: pairs must be within cylinder (compare rperp and rpar)
        valid = (rsky2 <= rperp2) & (rlos2 <= rpar2)
        i = i[valid]
        j = j[valid]

        # the correctly sorted indices of particles
        sort_i = sortindex1[i]
        sort_j = sortindex2[j]

        # the rank where the j object lives
        rank_j = startrank[j]

        # track pairs where sorted j > sorted i
        weights = numpy.where(sort_i < sort_j, 1, 0)
        j_gt_i[:] += numpy.bincount(i, weights=weights, minlength=len(pos1))

        # track pairs where j rank is wrong
        weights *= numpy.where(rank_j != comm.rank, 1, 0)
        wrong_rank[:] += numpy.bincount(i,
                                        weights=weights,
                                        minlength=len(pos1))

        # save the valid pairs for final calculations
        res = numpy.vstack([i, j, sort_i, sort_j]).T
        dataframe.append(res)

    # add all the valid pairs to a dataframe
    tree1.enum(tree2, rmax, process=callback)

    # sorted indices of objects that are centrals
    # (objects with no pairs with j > i)
    centrals = set(sortindex1[(j_gt_i == 0)])

    # sorted indices of objects that might be centrals
    # (pairs with j>i that live on other ranks)
    maybes = set(sortindex1[(wrong_rank > 0)])

    # store the pairs in a pandas dataframe for fast groupby
    dataframe = numpy.concatenate(dataframe, axis=0)
    df = pd.DataFrame(dataframe, columns=['i', 'j', 'sort_i', 'sort_j'])

    # we sort by the correct sorted index in descending order which puts
    # highest priority objects first
    df.sort_values("sort_i", ascending=False, inplace=True)

    # index by the correct sorted order
    df.set_index('sort_i', inplace=True)

    # to find centrals, considers objects that could be satellites of another
    # (pairs with sort_j > sort_i)
    possible_cens = df[(df['sort_j'] > df.index.values)]
    possible_cens = possible_cens.drop(centrals, errors='ignore')
    _remove_objects_paired_with(possible_cens,
                                centrals)  # remove objs paired with cens

    # sorted indices of objects that have pairs on other ranks
    # these objects are already "maybe" centrals
    on_other_ranks = sortindex1[(wrong_rank > 0)]

    # find the centrals and associated halo labels for each central
    all_centrals, labels = _find_centrals(comm, possible_cens, on_other_ranks,
                                          centrals, maybes)

    # reset the index and return
    df.reset_index(inplace=True)

    # add the halo labels for each pair in the dataframe
    labels = pd.Series(labels,
                       name='label_i',
                       index=pd.Index(all_centrals, name='sort_i'))
    df = df.join(labels, on='sort_i')
    labels.name = 'label_j'
    labels.index.name = 'sort_j'
    df = df.join(labels, on='sort_j')

    # iniitalize the output arrays
    labels = numpy.zeros(len(pos1), dtype='i8') - 1  # indexed by i
    types = numpy.zeros(len(pos1), dtype='u4')  # indexed by i
    counts = numpy.zeros(len(pos2), dtype='i8')  # indexed by j

    # assign labels of the centrals
    cens = df.dropna(subset=['label_j']).drop_duplicates('i')
    labels[cens['i'].values] = cens['label_i'].values

    # objects on this rank that are satellites
    # (no label for the 1st object in pair but a label for the 2nd object)
    sats = (df['label_i'].isnull()) & (~df['label_j'].isnull())
    df = df[sats]

    # find the corresponding central for each satellite
    df = df.sort_values('sort_j', ascending=False)
    df.set_index('sort_i', inplace=True)
    sats_grouped = df.groupby('sort_i', sort=False, as_index=False)
    centrals = sats_grouped.first(
    )  # these are the centrals for each satellite

    # update the satellite info with its pair with the highest priority
    cens_i = centrals['i'].values
    cens_j = centrals['j'].values
    counts += numpy.bincount(cens_j, minlength=len(pos2))
    types[cens_i] = 1
    labels[cens_i] = centrals['label_j'].values

    # sum counts across ranks (take the sum of any repeated objects)
    counts = layout2.gather(counts, mode='sum')

    # output fields
    dtype = numpy.dtype([('cgm_haloid', 'i8'), ('num_cgm_sats', 'i8'),
                         ('cgm_type', 'u4'), ('origind', 'u4')])
    out = numpy.empty(len(data), dtype=dtype)

    # gather the data back onto the original ranks
    # no ghosts for this domain layout so choose any particle
    out['cgm_haloid'] = layout1.gather(labels, mode='any')
    out['origind'] = layout1.gather(origind1, mode='any')
    out['num_cgm_sats'] = layout1.gather(counts, mode='any')
    out['cgm_type'] = layout1.gather(types, mode='any')

    # restore the original order
    mpsort.sort(out, orderby='origind', comm=comm)

    fields = ['cgm_type', 'cgm_haloid', 'num_cgm_sats']
    return out[fields]
Exemplo n.º 32
0
    def _assign_fibers(self, Label):
        """
        Initernal function to divide the data by collision group 
        across ranks and assign fibers, such that the minimum
        number of objects are collided out of the survey
        """
        import mpsort
        from mpi4py import MPI
        
        comm = self.comm
        mask = Label != 0
        PIG = numpy.empty(mask.sum(), dtype=[
                ('Position', ('f4', 3)),  
                ('Label', ('i4')), 
                ('Rank', ('i4')), 
                ('Index', ('i4')),
                ('Collided', ('i4')),
                ('NeighborID', ('i4'))
                ])
        PIG['Label'] = Label[mask]
        size = len(Label)
        size = comm.allgather(size)
        Ntot = sum(size)
        offset = sum(size[:comm.rank])
        PIG['Index'] = offset + numpy.where(mask == True)[0]
        del Label
        
        with self.datasource.open() as stream:
            [[Position]] = stream.read(['Position'], full=True)
        PIG['Position'] = Position[mask]
        del Position
        Ntot = comm.allreduce(len(mask))
        Nhalo = comm.allreduce(
            PIG['Label'].max() if len(PIG['Label']) > 0 else 0, op=MPI.MAX) + 1

        # now count number of particles per halo
        PIG['Rank'] = PIG['Label'] % comm.size
        cnt = numpy.bincount(PIG['Rank'], minlength=comm.size)
        Nlocal = comm.allreduce(cnt)[comm.rank]

        # sort by rank and then label
        PIG2 = numpy.empty(Nlocal, PIG.dtype)
        mpsort.sort(PIG, orderby='Rank', out=PIG2, comm=self.comm)
        assert (PIG2['Rank'] == comm.rank).all()
        PIG2.sort(order=['Label'])
        
        if self.comm.rank == 0:
            self.logger.info('total number of collision groups = %d', Nhalo-1)
            self.logger.info("Started fiber assignment")

        # loop over unique group ids
        for group_id in numpy.unique(PIG2['Label']):
            start = PIG2['Label'].searchsorted(group_id, side='left')
            end = PIG2['Label'].searchsorted(group_id, side='right')
            N = end-start
            assert(PIG2['Label'][start:end] == group_id).all()
            
            # pairs (random selection)
            if N == 2:
                
                # randomly choose, with fixed local seed
                which = numpy.random.choice([0,1])
                    
                indices = [start+which, start+(which^1)]
                PIG2['Collided'][indices] = [1, 0]
                PIG2['NeighborID'][indices] = [PIG2['Index'][start+(which^1)], -1]
            # multiplets (minimize collidedness)
            elif N > 2:
                collided, nearest = self._assign_multiplets(PIG2['Position'][start:end])
                PIG2['Collided'][start:end] = collided[:]
                PIG2['NeighborID'][start:end] = -1
                PIG2['NeighborID'][start:end][collided==1] = PIG2['Index'][start+nearest][:]

        if self.comm.rank == 0: self.logger.info("Finished fiber assignment")
    
        # return to the order specified by the global unique index
        mpsort.sort(PIG2, orderby='Index', out=PIG, comm=self.comm)
        
        # return arrays including the objects not in any groups
        collided = numpy.zeros(size[comm.rank], dtype='i4')
        collided[mask] = PIG['Collided'][:]
        neighbors = numpy.zeros(size[comm.rank], dtype='i4') - 1
        neighbors[mask] = PIG['NeighborID'][:]

        del PIG
        return collided, neighbors
Exemplo n.º 33
0
def poisson_sample_to_points(delta,
                             displacement,
                             pm,
                             nbar,
                             bias=1.,
                             seed=None,
                             logger=None):
    """
    Poisson sample the linear delta and displacement fields to points.

    The steps in this function:

    #.  Apply a biased, lognormal transformation to the input ``delta`` field
    #.  Poisson sample the overdensity field to discrete points
    #.  Disribute the positions of particles uniformly within the mesh cells,
        and assign the displacement field at each cell to the particles

    Parameters
    ----------
    delta : RealField
        the linear overdensity field to sample
    displacement : list of RealField (3,)
        the linear displacement fields which is used to move the particles
    nbar : float
        the desired number density of the output catalog of objects
    bias : float, optional
        apply a linear bias to the overdensity field (default is 1.)
    seed : int, optional
        the random seed used to Poisson sample the field to points

    Returns
    -------
    pos : array_like, (N, 3)
        the Cartesian positions of each of the generated particles
    displ : array_like, (N, 3)
        the displacement field sampled for each of the generated particles in the
        same units as the ``pos`` array
    """
    comm = delta.pm.comm

    # seed1 used for poisson sampling
    # seed2 used for uniform shift within a cell.
    seed1, seed2 = numpy.random.RandomState(seed).randint(0, 0xfffffff, size=2)

    # apply the lognormal transformation to the initial conditions density
    # this creates a positive-definite delta (necessary for Poisson sampling)
    lagrangian_bias = bias - 1.
    delta = lognormal_transform(delta, bias=lagrangian_bias)

    if logger and pm.comm.rank == 0:
        logger.info("Lognormal transformation done")

    # mean number of objects per cell
    H = delta.BoxSize / delta.Nmesh
    overallmean = H.prod() * nbar

    # number of objects in each cell (per rank, as a RealField)
    cellmean = delta * overallmean

    # create a random state with the input seed
    rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size)

    # generate poissons. Note that we use ravel/unravel to
    # maintain MPI invariane.
    Nravel = rng.poisson(lam=cellmean.ravel())
    N = delta.pm.create(mode='real')
    N.unravel(Nravel)

    Ntot = N.csum()
    if logger and pm.comm.rank == 0:
        logger.info("Poisson sampling done, total number of objects is %d" %
                    Ntot)

    pos_mesh = delta.pm.generate_uniform_particle_grid(shift=0.0)
    disp_mesh = numpy.empty_like(pos_mesh)

    # no need to do decompose because pos_mesh is strictly within the
    # local volume of the RealField.
    N_per_cell = N.readout(pos_mesh, resampler='nnb')
    for i in range(N.ndim):
        disp_mesh[:, i] = displacement[i].readout(pos_mesh, resampler='nnb')

    # fight round off errors, if any
    N_per_cell = numpy.int64(N_per_cell + 0.5)

    pos = pos_mesh.repeat(N_per_cell, axis=0)
    disp = disp_mesh.repeat(N_per_cell, axis=0)

    del pos_mesh
    del disp_mesh

    if logger and pm.comm.rank == 0:
        logger.info("catalog produced. Assigning in cell shift.")

    # generate linear ordering of the positions.
    # this should have been a method in pmesh, e.g. argument
    # to genereate_uniform_particle_grid(return_id=True);

    # FIXME: after pmesh update, remove this
    orderby = numpy.int64(pos[:, 0] / H[0] + 0.5)
    for i in range(1, delta.ndim):
        orderby[...] *= delta.Nmesh[i]
        orderby[...] += numpy.int64(pos[:, i] / H[i] + 0.5)

    # sort by ID to maintain MPI invariance.
    pos = mpsort.sort(pos, orderby=orderby, comm=comm)
    disp = mpsort.sort(disp, orderby=orderby, comm=comm)

    if logger and pm.comm.rank == 0:
        logger.info("sorting done")

    rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos))
    in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim, ))

    pos[...] += in_cell_shift
    pos[...] %= delta.BoxSize

    if logger and pm.comm.rank == 0:
        logger.info("catalog shifted.")

    return pos, disp
Exemplo n.º 34
0
def poisson_sample_to_points(delta, displacement, pm, nbar, bias=1., seed=None, logger=None):
    """
    Poisson sample the linear delta and displacement fields to points.

    The steps in this function:

    #.  Apply a biased, lognormal transformation to the input ``delta`` field
    #.  Poisson sample the overdensity field to discrete points
    #.  Disribute the positions of particles uniformly within the mesh cells,
        and assign the displacement field at each cell to the particles

    Parameters
    ----------
    delta : RealField
        the linear overdensity field to sample
    displacement : list of RealField (3,)
        the linear displacement fields which is used to move the particles
    nbar : float
        the desired number density of the output catalog of objects
    bias : float, optional
        apply a linear bias to the overdensity field (default is 1.)
    seed : int, optional
        the random seed used to Poisson sample the field to points

    Returns
    -------
    pos : array_like, (N, 3)
        the Cartesian positions of each of the generated particles
    displ : array_like, (N, 3)
        the displacement field sampled for each of the generated particles in the
        same units as the ``pos`` array
    """
    comm = delta.pm.comm

    # seed1 used for poisson sampling
    # seed2 used for uniform shift within a cell.
    seed1, seed2 = numpy.random.RandomState(seed).randint(0, 0xfffffff, size=2)

    # apply the lognormal transformation to the initial conditions density
    # this creates a positive-definite delta (necessary for Poisson sampling)
    lagrangian_bias = bias - 1.
    delta = lognormal_transform(delta, bias=lagrangian_bias)

    if logger and pm.comm.rank == 0:
        logger.info("Lognormal transformation done")

    # mean number of objects per cell
    H = delta.BoxSize / delta.Nmesh
    overallmean = H.prod() * nbar

    # number of objects in each cell (per rank, as a RealField)
    cellmean = delta * overallmean

    # create a random state with the input seed
    rng = MPIRandomState(seed=seed1, comm=comm, size=delta.size)

    # generate poissons. Note that we use ravel/unravel to
    # maintain MPI invariane.
    Nravel = rng.poisson(lam=cellmean.ravel())
    N = delta.pm.create(type='real')
    N.unravel(Nravel)

    Ntot = N.csum()
    if logger and pm.comm.rank == 0:
        logger.info("Poisson sampling done, total number of objects is %d" % Ntot)

    pos_mesh = delta.pm.generate_uniform_particle_grid(shift=0.0)
    disp_mesh = numpy.empty_like(pos_mesh)

    # no need to do decompose because pos_mesh is strictly within the
    # local volume of the RealField.
    N_per_cell = N.readout(pos_mesh, resampler='nnb')
    for i in range(N.ndim):
        disp_mesh[:, i] = displacement[i].readout(pos_mesh, resampler='nnb')

    # fight round off errors, if any
    N_per_cell = numpy.int64(N_per_cell + 0.5)

    pos = pos_mesh.repeat(N_per_cell, axis=0)
    disp = disp_mesh.repeat(N_per_cell, axis=0)

    del pos_mesh
    del disp_mesh

    if logger and pm.comm.rank == 0:
        logger.info("catalog produced. Assigning in cell shift.")

    # generate linear ordering of the positions.
    # this should have been a method in pmesh, e.g. argument
    # to genereate_uniform_particle_grid(return_id=True);

    # FIXME: after pmesh update, remove this
    orderby = numpy.int64(pos[:, 0] / H[0] + 0.5)
    for i in range(1, delta.ndim):
        orderby[...] *= delta.Nmesh[i]
        orderby[...] += numpy.int64(pos[:, i] / H[i] + 0.5)

    # sort by ID to maintain MPI invariance.
    pos = mpsort.sort(pos, orderby=orderby, comm=comm)
    disp = mpsort.sort(disp, orderby=orderby, comm=comm)

    if logger and pm.comm.rank == 0:
        logger.info("sorting done")

    rng_shift = MPIRandomState(seed=seed2, comm=comm, size=len(pos))
    in_cell_shift = rng_shift.uniform(0, H[i], itemshape=(delta.ndim,))

    pos[...] += in_cell_shift
    pos[...] %= delta.BoxSize

    if logger and pm.comm.rank == 0:
        logger.info("catalog shifted.")

    return pos, disp
Exemplo n.º 35
0
    def run(self):
        """
        Run the Subsample algorithm
        """
        import mpsort
        from astropy.utils.misc import NumpyRNGContext

        if self.smoothing is None:
            self.smoothing = self.datasource.BoxSize[0] / self.Nmesh[0]
        elif (self.datasource.BoxSize / self.Nmesh > self.smoothing).any():
            raise ValueError("smoothing is too small")

        def Smoothing(pm, complex):
            k = pm.k
            k2 = 0
            for ki in k:
                ki2 = ki ** 2
                complex[:] *= numpy.exp(-0.5 * ki2 * self.smoothing ** 2)

        def NormalizeDC(pm, complex):
            """ removes the DC amplitude. This effectively
                divides by the mean
            """
            w = pm.w
            comm = pm.comm
            ind = []
            value = 0.0
            found = True
            for wi in w:
                if (wi != 0).all():
                    found = False
                    break
                ind.append((wi == 0).nonzero()[0][0])
            if found:
                ind = tuple(ind)
                value = numpy.abs(complex[ind])
            value = comm.allreduce(value)
            complex[:] /= value

        # open the datasource and keep the cache
        with self.datasource.keep_cache():

            painter = Painter.create("DefaultPainter", paintbrush="cic")
            real, stats = painter.paint(self.pm, self.datasource)
            complex = real.r2c()

            for t in [Smoothing, NormalizeDC]:
                t(self.pm, complex)

            complex.c2r(real)

            columns = ["Position", "ID", "Velocity"]
            local_seed = utils.local_random_seed(self.seed, self.comm)

            dtype = numpy.dtype([("Position", ("f4", 3)), ("Velocity", ("f4", 3)), ("ID", "u8"), ("Density", "f4")])

            subsample = [numpy.empty(0, dtype=dtype)]

            with self.datasource.open() as stream:
                for Position, ID, Velocity in stream.read(columns):

                    with NumpyRNGContext(local_seed):
                        u = numpy.random.uniform(size=len(ID))
                    keep = u < self.ratio
                    Nkeep = keep.sum()
                    if Nkeep == 0:
                        continue
                    data = numpy.empty(Nkeep, dtype=dtype)
                    data["Position"][:] = Position[keep]
                    data["Velocity"][:] = Velocity[keep]
                    data["ID"][:] = ID[keep]

                    layout = self.pm.decompose(data["Position"])
                    pos1 = layout.exchange(data["Position"])
                    density1 = real.readout(pos1)
                    density = layout.gather(density1)

                    # normalize the position after reading out density!
                    data["Position"][:] /= self.datasource.BoxSize
                    data["Velocity"][:] /= self.datasource.BoxSize
                    data["Density"][:] = density
                    subsample.append(data)

        subsample = numpy.concatenate(subsample)
        mpsort.sort(subsample, orderby="ID", comm=self.comm)

        return subsample
Exemplo n.º 36
0
    def run(self):
        """
        Run the FOF6D Algorithm
        """
        import mpsort
        from mpi4py import MPI
        
        comm = self.comm
        offset = 0
        
        with self.halolabel.open() as stream:
            [[Label]] = stream.read(['Label'], full=True)
        mask = Label != 0
        PIG = numpy.empty(mask.sum(), dtype=[
                ('Position', ('f4', 3)), 
                ('Velocity', ('f4', 3)), 
                ('Label', ('i4')), 
                ('Rank', ('i4')), 
                ])
        PIG['Label'] = Label[mask]
        del Label
        with self.datasource.open() as stream:
            [[Position]] = stream.read(['Position'], full=True)
            PIG['Position'] = Position[mask]
            del Position
            [[Velocity]] = stream.read(['Velocity'], full=True)
            PIG['Velocity'] = Velocity[mask]
            del Velocity
     
        Ntot = comm.allreduce(len(mask))
        del mask

        Nhalo = comm.allreduce(
            PIG['Label'].max() if len(PIG['Label']) > 0 else 0, op=MPI.MAX) + 1

        # now count number of particles per halo
        PIG['Rank'] = PIG['Label'] % comm.size

        Nlocal = comm.allreduce(
                    numpy.bincount(PIG['Rank'], minlength=comm.size)
                 )[comm.rank]

        PIG2 = numpy.empty(Nlocal, PIG.dtype)

        mpsort.sort(PIG, orderby='Rank', out=PIG2, comm=self.comm)
        del PIG

        assert (PIG2['Rank'] == comm.rank).all()

        PIG2.sort(order=['Label'])

        self.logger.info('halos = %d', Nhalo)
        cat = []
        for haloid in numpy.unique(PIG2['Label']):
            hstart = PIG2['Label'].searchsorted(haloid, side='left')
            hend = PIG2['Label'].searchsorted(haloid, side='right')
            if hend - hstart < self.nmin: continue
            assert(PIG2['Label'][hstart:hend] == haloid).all()
            cat.append(
                subfof(
                    PIG2['Position'][hstart:hend], 
                    PIG2['Velocity'][hstart:hend], 
                    self.linklength * (self.datasource.BoxSize.prod() / Ntot) ** 0.3333, 
                    self.vfactor, haloid, Ntot, self.datasource.BoxSize))
        cat = numpy.concatenate(cat, axis=0)
        return cat, Ntot
Exemplo n.º 37
0
def main():
    comm = MPI.COMM_WORLD
    SNAP, LABEL = None, None
    if comm.rank == 0:
        SNAP = files.Snapshot(ns.snapfilename, files.TPMSnapshotFile)
        LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile)

    SNAP = comm.bcast(SNAP)
    LABEL = comm.bcast(LABEL)
 
    Ntot = sum(SNAP.npart)
    assert Ntot == sum(LABEL.npart)

    h = files.HaloFile(ns.halocatalogue)

    N = h.read_mass()

    N0 = Ntot - sum(N[1:])
    # halos are assigned to ranks 0, 1, 2, 3 ...
    halorank = numpy.arange(len(N)) % comm.size
    # but non halos are special we will fix it later.
    halorank[0] = -1

    NonhaloStart = comm.rank * int(N0) // comm.size
    NonhaloEnd   = (comm.rank + 1)* int(N0) // comm.size

    myNtotal = numpy.sum(N[halorank == comm.rank], dtype='i8') + (NonhaloEnd - NonhaloStart)

    print("Rank %d NonhaloStart %d NonhaloEnd %d myNtotal %d" %
            (comm.rank, NonhaloStart, NonhaloEnd, myNtotal))

    data = numpy.empty(myNtotal, dtype=[
                ('Position', ('f4', 3)), 
                ('Label', ('i4')), 
                ('Rank', ('i4')), 
                ])

    allNtotal = comm.allgather(myNtotal)
    start = sum(allNtotal[:comm.rank])
    end = sum(allNtotal[:comm.rank+1])
    data['Position'] = SNAP.read("Position", start, end)
    data['Label'] = LABEL.read("Label", start, end)
    data['Rank'] = halorank[data['Label']]
    # now assign ranks to nonhalo particles
    nonhalomask = (data['Label'] == 0)

    nonhalocount = comm.allgather(nonhalomask.sum())

    data['Rank'][nonhalomask] = (sum(nonhalocount[:comm.rank]) + numpy.arange(nonhalomask.sum())) % comm.size

    mpsort.sort(data, orderby='Rank')

    arg = data['Label'].argsort()
    data = data[arg]
    
    ul = numpy.unique(data['Label'])

    bins = correlate.RBinning(40./ ns.boxsize, Nbins=ns.Nmesh)
    sum1 = numpy.zeros(len(bins.centers))

    for l in ul:
        if l == 0: continue
        start = data['Label'].searchsorted(l, side='left')
        end = data['Label'].searchsorted(l, side='right')
        pos = data['Position'][start:end]
        dataset = correlate.points(pos, boxsize=1.0)
        result = correlate.paircount(dataset, dataset, bins, np=0)
        sum1 += result.sum1
        if l % 1000 == 0:
            print l

    sum1 = comm.allreduce(sum1, MPI.SUM)
    Ntot = sum(SNAP.npart)
    RR = 4. / 3 * numpy.pi * numpy.diff(bins.edges**3) * (1.0 * Ntot *Ntot)

    k = numpy.arange(ns.Nmesh // 2) * 2 * numpy.pi / ns.boxsize
    # asymtotically zero at r. The mean doesn't matter as 
    # we don't use zero k mode anyways.
    k, p = corrfrompower(bins.centers * ns.boxsize, sum1 / RR, R=k)
    # inverse FT factor
    p *= (2 * numpy.pi) ** 3

    if comm.rank == 0:

        if ns.output != '-':
            ff = open(ns.output, 'w')
            ff2 = open(ns.output +'.xi' , 'w')
            with ff2:
                numpy.savetxt(ff2, zip(bins.centers, sum1 / RR - 1.0))
        else:
            ff = stdout
        with ff:
    #        numpy.savetxt(ff, zip(bins.centers, sum1 / RR - 1.0))
            numpy.savetxt(ff, zip(k, p))
Exemplo n.º 38
0
Arquivo: cgm.py Projeto: bccp/nbodykit
def cgm(comm, data, domain, rperp, rpar, los, boxsize):
    """
    Perform the cylindrical grouping method

    This outputs a structured array with the same length as the input data
    with the following fields for each object in the original data:

    #. cgm_type :
        a flag specifying the type for each object,
        with 0 specifying CGM central and 1 denoting CGM satellite
    #. cgm_haloid :
        The index of the CGM object this object belongs to; an integer
        between 0 and the total number of CGM halos
    #. num_cgm_sats :
        The number of satellites in the CGM halo

    Parameters
    ----------
    comm :
        the MPI communicator
    data : CatalogSource
        catalog with sorted input data, including Position
    domain :
        the domain decomposition
    rperp, rpar : float
        the maximum distances to group objects together in the directions
        perpendicular and parallel to the line-of-sight; the cylinder
        has radius ``rperp`` and height ``2 * rpar``
    los :
        the line-of-sight vector
    boxsize :
        the boxsize, or ``None`` if not using periodic boundary conditions
    """
    # whether we do periodic boundary conditions
    periodic = boxsize is not None
    flat_sky = los is not None

    # the maximum distance still inside the cylinder set by rperp,rpar
    rperp2 = rperp**2; rpar2 = rpar**2
    rmax = (rperp2 + rpar2)**0.5

    pos0, origind0, sortindex0 = data.compute(data['Position'], data['origind'], data['sortindex'])

    layout1    = domain.decompose(pos0, smoothing=0)
    pos1       = layout1.exchange(pos0)
    origind1   = layout1.exchange(origind0)
    sortindex1 = layout1.exchange(sortindex0)

    # exchange particles across ranks, accounting for smoothing radius
    layout2    = domain.decompose(pos1, smoothing=rmax)
    pos2       = layout2.exchange(pos1)
    origind2   = layout2.exchange(origind1)
    sortindex2 = layout2.exchange(sortindex1)
    startrank  = layout2.exchange(numpy.ones(len(pos1), dtype='i4')*comm.rank)

    # make the KD-tree
    tree1 = kdcount.KDTree(pos1, boxsize=boxsize).root
    tree2 = kdcount.KDTree(pos2, boxsize=boxsize).root

    dataframe = []
    j_gt_i = numpy.zeros(len(pos1), dtype='f4')
    wrong_rank = numpy.zeros(len(pos1), dtype='f4')

    def callback(r, i, j):

        r1 = pos1[i]
        r2 = pos2[j]
        dr = r1 - r2

        # enforce periodicity in dpos
        if periodic:
            for axis, col in enumerate(dr.T):
                col[col > boxsize[axis]*0.5] -= boxsize[axis]
                col[col <= -boxsize[axis]*0.5] += boxsize[axis]

        # los distance
        if flat_sky:
            rlos2 =  numpy.einsum("ij,j->i", dr, los)**2
        else:
            center = 0.5 * (r1 + r2)
            dot2 = numpy.einsum('ij, ij->i', dr, center)**2
            center2 = numpy.einsum('ij, ij->i', center, center)
            rlos2 = dot2 / center2

        # sky
        dr2 = numpy.einsum('ij, ij->i', dr, dr)
        rsky2 = numpy.abs(dr2 - rlos2)

        # save the valid pairs
        # To Be Valid: pairs must be within cylinder (compare rperp and rpar)
        valid = (rsky2 <= rperp2)&(rlos2 <= rpar2)
        i = i[valid]; j = j[valid];

        # the correctly sorted indices of particles
        sort_i = sortindex1[i]
        sort_j = sortindex2[j]

        # the rank where the j object lives
        rank_j = startrank[j]

        # track pairs where sorted j > sorted i
        weights = numpy.where(sort_i < sort_j, 1, 0)
        j_gt_i[:] += numpy.bincount(i, weights=weights, minlength=len(pos1))

        # track pairs where j rank is wrong
        weights *= numpy.where(rank_j != comm.rank, 1, 0)
        wrong_rank[:] += numpy.bincount(i, weights=weights, minlength=len(pos1))

        # save the valid pairs for final calculations
        res = numpy.vstack([i, j, sort_i, sort_j]).T
        dataframe.append(res)

    # add all the valid pairs to a dataframe
    tree1.enum(tree2, rmax, process=callback)

    # sorted indices of objects that are centrals
    # (objects with no pairs with j > i)
    centrals = set(sortindex1[(j_gt_i==0)])

    # sorted indices of objects that might be centrals
    # (pairs with j>i that live on other ranks)
    maybes = set(sortindex1[(wrong_rank>0)])

    # store the pairs in a pandas dataframe for fast groupby
    dataframe = numpy.concatenate(dataframe, axis=0)
    df = pd.DataFrame(dataframe, columns=['i', 'j', 'sort_i', 'sort_j'])

    # we sort by the correct sorted index in descending order which puts
    # highest priority objects first
    df.sort_values("sort_i", ascending=False, inplace=True)

    # index by the correct sorted order
    df.set_index('sort_i', inplace=True)

    # to find centrals, considers objects that could be satellites of another
    # (pairs with sort_j > sort_i)
    possible_cens = df[(df['sort_j']>df.index.values)]
    possible_cens = possible_cens.drop(centrals, errors='ignore')
    _remove_objects_paired_with(possible_cens, centrals) # remove objs paired with cens

    # sorted indices of objects that have pairs on other ranks
    # these objects are already "maybe" centrals
    on_other_ranks = sortindex1[(wrong_rank>0)]

    # find the centrals and associated halo labels for each central
    all_centrals, labels = _find_centrals(comm, possible_cens, on_other_ranks, centrals, maybes)

    # reset the index and return
    df.reset_index(inplace=True)

    # add the halo labels for each pair in the dataframe
    labels = pd.Series(labels, name='label_i', index=pd.Index(all_centrals, name='sort_i'))
    df = df.join(labels, on='sort_i')
    labels.name = 'label_j'; labels.index.name = 'sort_j'
    df = df.join(labels, on='sort_j')

    # iniitalize the output arrays
    labels = numpy.zeros(len(pos1), dtype='i8') - 1 # indexed by i
    types = numpy.zeros(len(pos1), dtype='u4') # indexed by i
    counts = numpy.zeros(len(pos2), dtype='i8') # indexed by j

    # assign labels of the centrals
    cens = df.dropna(subset=['label_j']).drop_duplicates('i')
    labels[cens['i'].values] = cens['label_i'].values

    # objects on this rank that are satellites
    # (no label for the 1st object in pair but a label for the 2nd object)
    sats = (df['label_i'].isnull())&(~df['label_j'].isnull())
    df = df[sats]

    # find the corresponding central for each satellite
    df = df.sort_values('sort_j', ascending=False)
    df.set_index('sort_i', inplace=True)
    sats_grouped = df.groupby('sort_i', sort=False, as_index=False)
    centrals = sats_grouped.first() # these are the centrals for each satellite

    # update the satellite info with its pair with the highest priority
    cens_i = centrals['i'].values; cens_j = centrals['j'].values
    counts += numpy.bincount(cens_j, minlength=len(pos2))
    types[cens_i] = 1
    labels[cens_i] = centrals['label_j'].values

    # sum counts across ranks (take the sum of any repeated objects)
    counts = layout2.gather(counts, mode='sum')

    # output fields
    dtype = numpy.dtype([('cgm_haloid', 'i8'),
                         ('num_cgm_sats', 'i8'),
                         ('cgm_type', 'u4'),
                         ('origind', 'u4')])
    out = numpy.empty(len(data), dtype=dtype)

    # gather the data back onto the original ranks
    # no ghosts for this domain layout so choose any particle
    out['cgm_haloid'] = layout1.gather(labels, mode='any')
    out['origind'] = layout1.gather(origind1, mode='any')
    out['num_cgm_sats'] = layout1.gather(counts, mode='any')
    out['cgm_type'] = layout1.gather(types, mode='any')

    # restore the original order
    mpsort.sort(out, orderby='origind', comm=comm)

    fields = ['cgm_type', 'cgm_haloid', 'num_cgm_sats']
    return out[fields]
Exemplo n.º 39
0
def main():
    comm = MPI.COMM_WORLD
    LABEL = None
    if comm.rank == 0:
        LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile)

    LABEL = comm.bcast(LABEL)
 
    offset = 0
    
    PIG = []
    for Position, Velocity in \
            ns.datasource.read(['Position', 'Velocity'], comm, bunchsize=4*1024*1024):

        mystart = offset + sum(comm.allgather(len(Position))[:comm.rank])
        myend = mystart + len(Position)
        label = LABEL.read("Label", mystart, myend)
        offset += comm.allreduce(len(Position))
        mask = label != 0 
        mydata = numpy.empty(mask.sum(), dtype=[
                ('Position', ('f4', 3)), 
                ('Velocity', ('f4', 3)), 
                ('Label', ('i4')), 
                ('Rank', ('i4')), 
                ])
        mydata['Position'] = Position[mask] / ns.datasource.BoxSize
        mydata['Velocity'] = Velocity[mask] / ns.datasource.BoxSize
        mydata['Label'] = label[mask]
        PIG.append(mydata)
        del mydata
    Ntot = offset

    PIG = numpy.concatenate(PIG, axis=0)

    Nhalo = comm.allreduce(
        PIG['Label'].max() if len(PIG['Label']) > 0 else 0, op=MPI.MAX) + 1

    # now count number of particles per halo
    PIG['Rank'] = PIG['Label'] % comm.size

    Nlocal = comm.allreduce(
                numpy.bincount(PIG['Rank'], minlength=comm.size)
             )[comm.rank]

    PIG2 = numpy.empty(Nlocal, PIG.dtype)

    mpsort.sort(PIG, orderby='Rank', out=PIG2)
    del PIG

    assert (PIG2['Rank'] == comm.rank).all()

    PIG2.sort(order=['Label'])

    logging.info('halos = %d', Nhalo)
    cat = []
    for haloid in numpy.unique(PIG2['Label']):
        hstart = PIG2['Label'].searchsorted(haloid, side='left')
        hend = PIG2['Label'].searchsorted(haloid, side='right')
        if hstart - hend < ns.Nmin: continue
        assert(PIG2['Label'][hstart:hend] == haloid).all()
        print 'Halo', haloid
        cat.append(
            subfof(
                PIG2['Position'][hstart:hend], 
                PIG2['Velocity'][hstart:hend], 
                ns.linklength * (ns.datasource.BoxSize.prod() / Ntot) ** 0.3333, 
                ns.vfactor, haloid, Ntot))

    cat = numpy.concatenate(cat, axis=0)
    cat = comm.gather(cat)

    if comm.rank == 0:
        cat = numpy.concatenate(cat, axis=0)
        print cat
        with h5py.File(ns.output, mode='w') as f:
            dataset = f.create_dataset('Subhalos', data=cat)
            dataset.attrs['LinkingLength'] = ns.linklength
            dataset.attrs['VFactor'] = ns.vfactor
            dataset.attrs['Ntot'] = Ntot
            dataset.attrs['BoxSize'] = ns.datasource.BoxSize
Exemplo n.º 40
0
    def run(self):
        """
        Run the Subsample algorithm
        """
        import mpsort
        from astropy.utils.misc import NumpyRNGContext

        if self.smoothing is None:
            self.smoothing = self.datasource.BoxSize[0] / self.Nmesh[0]
        elif (self.datasource.BoxSize / self.Nmesh > self.smoothing).any():
            raise ValueError("smoothing is too small")

        def Smoothing(pm, complex):
            k = pm.k
            k2 = 0
            for ki in k:
                ki2 = ki**2
                complex[:] *= numpy.exp(-0.5 * ki2 * self.smoothing**2)

        def NormalizeDC(pm, complex):
            """ removes the DC amplitude. This effectively
                divides by the mean
            """
            w = pm.w
            comm = pm.comm
            ind = []
            value = 0.0
            found = True
            for wi in w:
                if (wi != 0).all():
                    found = False
                    break
                ind.append((wi == 0).nonzero()[0][0])
            if found:
                ind = tuple(ind)
                value = numpy.abs(complex[ind])
            value = comm.allreduce(value)
            complex[:] /= value

        # open the datasource and keep the cache
        with self.datasource.keep_cache():

            painter = Painter.create("DefaultPainter", paintbrush='cic')
            real, stats = painter.paint(self.pm, self.datasource)
            complex = real.r2c()

            for t in [Smoothing, NormalizeDC]:
                t(self.pm, complex)

            complex.c2r(real)

            columns = ['Position', 'ID', 'Velocity']
            local_seed = utils.local_random_seed(self.seed, self.comm)

            dtype = numpy.dtype([
                ('Position', ('f4', 3)),
                ('Velocity', ('f4', 3)),
                ('ID', 'u8'),
                ('Density', 'f4'),
            ])

            subsample = [numpy.empty(0, dtype=dtype)]

            with self.datasource.open() as stream:
                for Position, ID, Velocity in stream.read(columns):

                    with NumpyRNGContext(local_seed):
                        u = numpy.random.uniform(size=len(ID))
                    keep = u < self.ratio
                    Nkeep = keep.sum()
                    if Nkeep == 0: continue
                    data = numpy.empty(Nkeep, dtype=dtype)
                    data['Position'][:] = Position[keep]
                    data['Velocity'][:] = Velocity[keep]
                    data['ID'][:] = ID[keep]

                    layout = self.pm.decompose(data['Position'])
                    pos1 = layout.exchange(data['Position'])
                    density1 = real.readout(pos1)
                    density = layout.gather(density1)

                    # normalize the position after reading out density!
                    data['Position'][:] /= self.datasource.BoxSize
                    data['Velocity'][:] /= self.datasource.BoxSize
                    data['Density'][:] = density
                    subsample.append(data)

        subsample = numpy.concatenate(subsample)
        mpsort.sort(subsample, orderby='ID', comm=self.comm)

        return subsample
Exemplo n.º 41
0
def main():
    comm = MPI.COMM_WORLD
    IC, SNAP, LABEL = None, None, None
    if comm.rank == 0:
        LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile)

    LABEL = comm.bcast(LABEL)
 
    Ntot = sum(LABEL.npart)

    [[ID]] = ns.datasource_tf.read(['ID'], comm, bunchsize=None)

    start = sum(comm.allgather(len(ID))[:comm.rank])
    end   = sum(comm.allgather(len(ID))[:comm.rank+1])
    data = numpy.empty(end - start, dtype=[
                ('Label', ('i4')), 
                ('ID', ('i8')), 
                ])
    data['ID'] = ID
    del ID
    data['Label'] = LABEL.read("Label", start, end)

    mpsort.sort(data, orderby='ID')

    label = data['Label'].copy()

    data = numpy.empty(end - start, dtype=[
                ('ID', ('i8')), 
                ('Position', ('f4', 3)), 
                ])
    [[data['Position'][...]]] = ns.datasource_ti.read(['Position'], comm, bunchsize=None)
    [[data['ID'][...]]] = ns.datasource_ti.read(['ID'], comm, bunchsize=None)
    mpsort.sort(data, orderby='ID')

    pos = data['Position'] / ns.datasource_ti.BoxSize
    del data
    
    N = halos.count(label)
    hpos = halos.centerofmass(label, pos, boxsize=1.0)
    
    if comm.rank == 0:
        logging.info("Total number of halos: %d" % len(N))
        logging.info("N %s" % str(N))
        LinkingLength = LABEL.get_file(0).linking_length

        with h5py.File(ns.output + '.hdf5', 'w') as ff:
            N[0] = 0
            data = numpy.empty(shape=(len(N),), 
                dtype=[
                ('Position', ('f4', 3)),
                ('Velocity', ('f4', 3)),
                ('Length', 'i4')])
            
            data['Position'] = hpos
            data['Velocity'] = 0
            data['Length'] = N
            
            # do not create dataset then fill because of
            # https://github.com/h5py/h5py/pull/606

            dataset = ff.create_dataset(
                name='TracedFOFGroups', data=data
                )
            dataset.attrs['Ntot'] = Ntot
            dataset.attrs['BoxSize'] = ns.datasource_ti.BoxSize
            dataset.attrs['ti'] = ns.datasource_ti.string
            dataset.attrs['tf'] = ns.datasource_tf.string

        logging.info("Written %s" % ns.output + '.hdf5')