Exemplo n.º 1
0
    def run(self):
        """
        Run the TraceHalo Algorithm
        """
        import mpsort
        from nbodykit import halos
        
        comm = self.comm

        with self.source.open() as source:
            [[ID]] = source.read(['ID'], full=True)

        Ntot = self.comm.allreduce(len(ID))

        with self.sourcelabel.open() as sourcelabel:
            [[label]] = sourcelabel.read(['Label'], full=True)

        mpsort.sort(label, orderby=ID, comm=self.comm)
        del ID

        data = numpy.empty(len(label), dtype=[
                    ('ID', ('i8')), 
                    ('Position', ('f4', 3)), 
                    ('Velocity', ('f4', 3)), 
                    ])
        with self.dest.open() as dest:
            [[data['Position'][...]]] = dest.read(['Position'], full=True)
            [[data['Velocity'][...]]] = dest.read(['Velocity'], full=True)
            [[data['ID'][...]]] = dest.read(['ID'], full=True)
        mpsort.sort(data, orderby='ID', comm=self.comm)

        data['Position'] /= self.dest.BoxSize
        data['Velocity'] /= self.dest.BoxSize
        
        N = halos.count(label)
        hpos = halos.centerofmass(label, data['Position'], boxsize=1.0)
        hvel = halos.centerofmass(label, data['Velocity'], boxsize=None)
        return hpos, hvel, N, Ntot
Exemplo n.º 2
0
def main():
    comm = MPI.COMM_WORLD
    np = split_size_2d(comm.size)

    grid = [
        numpy.linspace(0, 1.0, np[0] + 1, endpoint=True),
        numpy.linspace(0, 1.0, np[1] + 1, endpoint=True),
    ]
    domain = GridND(grid)
    if comm.rank == 0:
        logging.info('grid %s' % str(grid) )

    [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Position', 'ID'])

    tpos = P['Position']
    tid = P['ID']
    del P

    Ntot = sum(comm.allgather(len(tpos)))

    if comm.rank == 0:
        logging.info('Total number of particles %d, ll %g' % (Ntot, ns.LinkingLength))
    ll = ns.LinkingLength * Ntot ** -0.3333333

    #print pos
    #print ((pos[0] - pos[1]) ** 2).sum()** 0.5, ll
  
    layout = domain.decompose(tpos, smoothing=ll * 1)

    tpos = layout.exchange(tpos)
    tid = layout.exchange(tid)

    logging.info('domain %d has %d particles' % (comm.rank, len(tid)))

    data = cluster.dataset(tpos, boxsize=1.0)
    fof = cluster.fof(data, linking_length=ll, np=0, verbose=True)
    
    # initialize global labels
    minid = equiv_class(fof.labels, tid, op=numpy.fmin)[fof.labels]
    del fof
    del data
    del tpos
    del tid

    while True:
        # merge, if a particle belongs to several ranks
        # use the global label of the minimal
        minid_new = layout.gather(minid, mode=numpy.fmin)
        minid_new = layout.exchange(minid_new)

        # on my rank, these particles have been merged
        merged = minid_new != minid
        # if no rank has merged any, we are done
        # gl is the global label (albeit with some holes)
        total = comm.allreduce(merged.sum())
            
        if comm.rank == 0:
            print 'merged ', total, 'halos'

        if total == 0:
            del minid_new
            break
        old = minid[merged]
        new = minid_new[merged]
        arg = old.argsort()
        new = new[arg]
        old = old[arg]
        replacesorted(minid, old, new, out=minid)

    minid = layout.gather(minid, mode=numpy.fmin)

    label = assign_halo_label(minid, comm, thresh=ns.nmin) 

    N = halos.count(label, comm=comm)

    if comm.rank == 0:
        print 'total halos is', len(N)

    [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Position'])

    hpos = halos.centerofmass(label, P['Position'], boxsize=1.0, comm=comm)

    [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Velocity'])

    hvel = halos.centerofmass(label, P['Velocity'], boxsize=None, comm=comm)

    if comm.rank == 0:
        print N
        print 'total groups', N.shape
        print 'total particles', N.sum()
        print 'above ', ns.nmin, (N >ns.nmin).sum()
        N[0] = -1
        with open(ns.output + '.halo', 'w') as ff:
            numpy.int32(len(N)).tofile(ff)
            numpy.float32(ns.LinkingLength).tofile(ff)
            numpy.int32(N).tofile(ff)
            numpy.float32(hpos).tofile(ff)
            numpy.float32(hvel).tofile(ff)
        print hpos
    del N
    del hpos

    npart = None
    if comm.rank == 0:
        snapshot = Snapshot(ns.filename,TPMSnapshotFile)
        npart = snapshot.npart
        for i in range(len(snapshot.npart)):
            with open(ns.output + '.grp.%02d' % i, 'w') as ff:
                numpy.int32(npart[i]).tofile(ff)
                numpy.float32(ns.LinkingLength).tofile(ff)
                pass
    npart = comm.bcast(npart)

    start = sum(comm.allgather(len(label))[:comm.rank])
    end = sum(comm.allgather(len(label))[:comm.rank+1])
    label = numpy.int32(label)
    written = 0
    for i in range(len(npart)):
        filestart = sum(npart[:i])
        fileend = sum(npart[:i+1])
        mystart = start - filestart
        myend = end - filestart
        if myend <= 0 : continue
        if mystart >= npart[i] : continue
        if myend > npart[i]: myend = npart[i]
        if mystart < 0: mystart = 0
        with open(ns.output + '.grp.%02d' % i, 'r+') as ff:
            ff.seek(8, 0)
            ff.seek(mystart * 4, 1)
            label[written:written + myend - mystart].tofile(ff)
        written += myend - mystart

    return
Exemplo n.º 3
0
def fof_catalogue(datasource, label, comm, calculate_initial=False):
    """ Catalogue of FOF groups based on label from a data source

        Friend-of-friend was first used by Davis et al 1985 to define
        halos in hierachical structure formation of cosmological simulations.
        The algorithm is also known as DBSCAN in computer science. 
        The subroutine here implements a parallel version of the FOF. 

        The underlying local FOF algorithm is from `kdcount.cluster`, 
        which is an adaptation of the implementation in Volker Springel's 
        Gadget and Martin White's PM. It could have been done faster.

        Parameters
        ----------
        label : array_like
            halo label of particles from data source. 

        datasource: DataSource
            datasource; must support Position and Velocity.
            datasource.BoxSize is used too.

        comm: MPI.Comm
            The mpi communicator. Must agree with the datasource

        Returns
        -------
        catalogue: array_like
            A 1-d array of type 'Position', 'Velocity', 'Length'. 
            The center mass position and velocity of the FOF halo, and
            Length is the number of particles in a halo. The catalogue is
            sorted such that the most massive halo is first. catalogue[0]
            does not correspond to any halo.
 
    """
    dtype=[
        ('Position', ('f4', 3)),
        ('Velocity', ('f4', 3)),
        ('Length', 'i4')]

    N = halos.count(label, comm=comm)
    
    # explicitly open the DataSource
    with datasource.keep_cache():
    
        with datasource.open() as stream:
            [[Position]] = stream.read(['Position'], full=True)
        Position /= datasource.BoxSize
        hpos = halos.centerofmass(label, Position, boxsize=1.0, comm=comm)
        del Position

        with datasource.open() as stream: 
            [[Velocity]] = stream.read(['Velocity'], full=True)
        Velocity /= datasource.BoxSize

        hvel = halos.centerofmass(label, Velocity, boxsize=None, comm=comm)
        del Velocity

        if calculate_initial:

            dtype.append(('InitialPosition', ('f4', 3)))
        
            with datasource.open() as stream:
                [[Position]] = stream.read(['InitialPosition'], full=True)
            Position /= datasource.BoxSize
            hpos_init = halos.centerofmass(label, Position, boxsize=1.0, comm=comm)
            del Position

    if comm.rank == 0: logger.info("Calculated catalogue %d halos found. " % (len(N) -1 ))
    if comm.rank == 0: logger.info("Length = %s " % N[1:])
    if comm.rank == 0: logger.info("%d particles not in halo" % N[0])

    if comm.rank == 0:
        catalogue = numpy.empty(shape=len(N), dtype=dtype)

        catalogue['Position'] = hpos
        catalogue['Velocity'] = hvel
        catalogue['Length'] = N
        catalogue['Length'][0] = 0
        if calculate_initial:
            catalogue['InitialPosition'] = hpos_init
    else:
        catalogue = numpy.empty(shape=0, dtype=dtype)
        
    return catalogue
Exemplo n.º 4
0
def main():
    comm = MPI.COMM_WORLD
    IC, SNAP, LABEL = None, None, None
    if comm.rank == 0:
        LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile)

    LABEL = comm.bcast(LABEL)
 
    Ntot = sum(LABEL.npart)

    [[ID]] = ns.datasource_tf.read(['ID'], comm, bunchsize=None)

    start = sum(comm.allgather(len(ID))[:comm.rank])
    end   = sum(comm.allgather(len(ID))[:comm.rank+1])
    data = numpy.empty(end - start, dtype=[
                ('Label', ('i4')), 
                ('ID', ('i8')), 
                ])
    data['ID'] = ID
    del ID
    data['Label'] = LABEL.read("Label", start, end)

    mpsort.sort(data, orderby='ID')

    label = data['Label'].copy()

    data = numpy.empty(end - start, dtype=[
                ('ID', ('i8')), 
                ('Position', ('f4', 3)), 
                ])
    [[data['Position'][...]]] = ns.datasource_ti.read(['Position'], comm, bunchsize=None)
    [[data['ID'][...]]] = ns.datasource_ti.read(['ID'], comm, bunchsize=None)
    mpsort.sort(data, orderby='ID')

    pos = data['Position'] / ns.datasource_ti.BoxSize
    del data
    
    N = halos.count(label)
    hpos = halos.centerofmass(label, pos, boxsize=1.0)
    
    if comm.rank == 0:
        logging.info("Total number of halos: %d" % len(N))
        logging.info("N %s" % str(N))
        LinkingLength = LABEL.get_file(0).linking_length

        with h5py.File(ns.output + '.hdf5', 'w') as ff:
            N[0] = 0
            data = numpy.empty(shape=(len(N),), 
                dtype=[
                ('Position', ('f4', 3)),
                ('Velocity', ('f4', 3)),
                ('Length', 'i4')])
            
            data['Position'] = hpos
            data['Velocity'] = 0
            data['Length'] = N
            
            # do not create dataset then fill because of
            # https://github.com/h5py/h5py/pull/606

            dataset = ff.create_dataset(
                name='TracedFOFGroups', data=data
                )
            dataset.attrs['Ntot'] = Ntot
            dataset.attrs['BoxSize'] = ns.datasource_ti.BoxSize
            dataset.attrs['ti'] = ns.datasource_ti.string
            dataset.attrs['tf'] = ns.datasource_tf.string

        logging.info("Written %s" % ns.output + '.hdf5')
Exemplo n.º 5
0
def main():
    comm = MPI.COMM_WORLD
    np = split_size_2d(comm.size)

    grid = [numpy.linspace(0, 1.0, np[0] + 1, endpoint=True), numpy.linspace(0, 1.0, np[1] + 1, endpoint=True)]
    domain = GridND(grid)
    if comm.rank == 0:
        logging.info("grid %s" % str(grid))

    # read in all !
    [[Position]] = ns.datasource.read(["Position"], comm, bunchsize=None)
    Position /= ns.datasource.BoxSize
    print Position.shape
    print Position.max(axis=0)
    Ntot = sum(comm.allgather(len(Position)))

    if comm.rank == 0:
        logging.info("Total number of particles %d, ll %g" % (Ntot, ns.LinkingLength))
    ll = ns.LinkingLength * Ntot ** -0.3333333

    # print pos
    # print ((pos[0] - pos[1]) ** 2).sum()** 0.5, ll

    layout = domain.decompose(Position, smoothing=ll * 1)

    Position = layout.exchange(Position)

    logging.info("domain %d has %d particles" % (comm.rank, len(Position)))

    labels = local_fof(Position, ll)
    del Position

    if comm.rank == 0:
        logging.info("local fof done")

    [[ID]] = ns.datasource.read(["ID"], comm, bunchsize=None)
    ID = layout.exchange(ID)
    # initialize global labels
    minid = equiv_class(labels, ID, op=numpy.fmin)[labels]
    del ID

    if comm.rank == 0:
        logging.info("equiv class, done")

    while True:
        # merge, if a particle belongs to several ranks
        # use the global label of the minimal
        minid_new = layout.gather(minid, mode=numpy.fmin)
        minid_new = layout.exchange(minid_new)

        # on my rank, these particles have been merged
        merged = minid_new != minid
        # if no rank has merged any, we are done
        # gl is the global label (albeit with some holes)
        total = comm.allreduce(merged.sum())

        if comm.rank == 0:
            logging.info("merged %d halos", total)

        if total == 0:
            del minid_new
            break
        old = minid[merged]
        new = minid_new[merged]
        arg = old.argsort()
        new = new[arg]
        old = old[arg]
        replacesorted(minid, old, new, out=minid)

    minid = layout.gather(minid, mode=numpy.fmin)
    del layout

    if comm.rank == 0:
        logging.info("merging, done")

    Nitem = len(minid)

    data = numpy.empty(Nitem, dtype=[("origind", "u8"), ("fofid", "u8")])
    # assign origind for recovery of ordering, since
    # we need to work in sorted fofid
    data["fofid"] = minid
    del minid

    label = assign_halo_label(data, comm, thresh=ns.nmin)
    label = label.copy()
    del data
    N = halos.count(label, comm=comm)

    if comm.rank == 0:
        logging.info("Length of entries %s ", str(N))
        logging.info("Length of entries %s ", N.shape[0])
        logging.info("Total particles %s ", N.sum())

    [[Position]] = ns.datasource.read(["Position"], comm, bunchsize=None)

    Position /= ns.datasource.BoxSize
    hpos = halos.centerofmass(label, Position, boxsize=1.0, comm=comm)
    del Position

    [[Velocity]] = ns.datasource.read(["Velocity"], comm, bunchsize=None)
    Velocity /= ns.datasource.BoxSize

    hvel = halos.centerofmass(label, Velocity, boxsize=None, comm=comm)
    del Velocity

    if comm.rank == 0:
        with h5py.File(ns.output + ".hdf5", "w") as ff:
            N[0] = 0
            data = numpy.empty(
                shape=(len(N),), dtype=[("Position", ("f4", 3)), ("Velocity", ("f4", 3)), ("Length", "i4")]
            )

            data["Position"] = hpos
            data["Velocity"] = hvel
            data["Length"] = N

            # do not create dataset then fill because of
            # https://github.com/h5py/h5py/pull/606

            dataset = ff.create_dataset(name="FOFGroups", data=data)
            dataset.attrs["Ntot"] = Ntot
            dataset.attrs["LinkLength"] = ns.LinkingLength
            dataset.attrs["BoxSize"] = ns.datasource.BoxSize

    del N
    del hpos

    Ntot = comm.allreduce(len(label))
    nfile = (Ntot + 512 ** 3 - 1) // (512 ** 3)

    npart = [(i + 1) * Ntot // nfile - i * Ntot // nfile for i in range(nfile)]

    if comm.rank == 0:
        for i in range(len(npart)):
            with open(ns.output + ".grp.%02d" % i, "w") as ff:
                numpy.int32(npart[i]).tofile(ff)
                numpy.float32(ns.LinkingLength).tofile(ff)
                pass

    start = sum(comm.allgather(len(label))[: comm.rank])
    end = sum(comm.allgather(len(label))[: comm.rank + 1])
    label = numpy.int32(label)
    written = 0
    for i in range(len(npart)):
        filestart = sum(npart[:i])
        fileend = sum(npart[: i + 1])
        mystart = start - filestart
        myend = end - filestart
        if myend <= 0:
            continue
        if mystart >= npart[i]:
            continue
        if myend > npart[i]:
            myend = npart[i]
        if mystart < 0:
            mystart = 0
        with open(ns.output + ".grp.%02d" % i, "r+") as ff:
            ff.seek(8, 0)
            ff.seek(mystart * 4, 1)
            label[written : written + myend - mystart].tofile(ff)
        written += myend - mystart

    return
Exemplo n.º 6
0
def fof_catalogue(datasource, label, comm, calculate_initial=False):
    """ Catalogue of FOF groups based on label from a data source

        Friend-of-friend was first used by Davis et al 1985 to define
        halos in hierachical structure formation of cosmological simulations.
        The algorithm is also known as DBSCAN in computer science. 
        The subroutine here implements a parallel version of the FOF. 

        The underlying local FOF algorithm is from `kdcount.cluster`, 
        which is an adaptation of the implementation in Volker Springel's 
        Gadget and Martin White's PM. It could have been done faster.

        Parameters
        ----------
        label : array_like
            halo label of particles from data source. 

        datasource: DataSource
            datasource; must support Position and Velocity.
            datasource.BoxSize is used too.

        comm: MPI.Comm
            The mpi communicator. Must agree with the datasource

        Returns
        -------
        catalogue: array_like
            A 1-d array of type 'Position', 'Velocity', 'Length'. 
            The center mass position and velocity of the FOF halo, and
            Length is the number of particles in a halo. The catalogue is
            sorted such that the most massive halo is first. catalogue[0]
            does not correspond to any halo.
 
    """
    dtype = [('Position', ('f4', 3)), ('Velocity', ('f4', 3)),
             ('Length', 'i4')]

    N = halos.count(label, comm=comm)

    # explicitly open the DataSource
    with datasource.keep_cache():

        with datasource.open() as stream:
            [[Position]] = stream.read(['Position'], full=True)
        Position /= datasource.BoxSize
        hpos = halos.centerofmass(label, Position, boxsize=1.0, comm=comm)
        del Position

        with datasource.open() as stream:
            [[Velocity]] = stream.read(['Velocity'], full=True)
        Velocity /= datasource.BoxSize

        hvel = halos.centerofmass(label, Velocity, boxsize=None, comm=comm)
        del Velocity

        if calculate_initial:

            dtype.append(('InitialPosition', ('f4', 3)))

            with datasource.open() as stream:
                [[Position]] = stream.read(['InitialPosition'], full=True)
            Position /= datasource.BoxSize
            hpos_init = halos.centerofmass(label,
                                           Position,
                                           boxsize=1.0,
                                           comm=comm)
            del Position

    if comm.rank == 0:
        logger.info("Calculated catalogue %d halos found. " % (len(N) - 1))
    if comm.rank == 0: logger.info("Length = %s " % N[1:])
    if comm.rank == 0: logger.info("%d particles not in halo" % N[0])

    if comm.rank == 0:
        catalogue = numpy.empty(shape=len(N), dtype=dtype)

        catalogue['Position'] = hpos
        catalogue['Velocity'] = hvel
        catalogue['Length'] = N
        catalogue['Length'][0] = 0
        if calculate_initial:
            catalogue['InitialPosition'] = hpos_init
    else:
        catalogue = numpy.empty(shape=0, dtype=dtype)

    return catalogue