Python BigFileMPI示例，bigfile.BigFileMPI Python示例

示例#1

0

显示文件

文件： FOF.py 项目： mjvakili/nbodykit

    def save(self, output, data):
        if self.without_labels:
            catalog, Ntot = data
        else:
            catalog, labels, Ntot = data

        if self.comm.rank == 0:
            with h5py.File(output, 'w') as ff:
                # do not create dataset then fill because of
                # https://github.com/h5py/h5py/pull/606

                dataset = ff.create_dataset(name='FOFGroups', data=catalog)
                dataset.attrs['Ntot'] = Ntot
                dataset.attrs['LinkLength'] = self.linklength
                dataset.attrs['BoxSize'] = self.datasource.BoxSize

        if not self.without_labels:
            output = output.replace('.hdf5', '.labels')
            bf = bigfile.BigFileMPI(self.comm, output, create=True)
            with bf.create_from_array("Label",
                                      labels,
                                      Nfile=(self.comm.size + 7) // 8) as bb:
                bb.attrs['LinkLength'] = self.linklength
                bb.attrs['Ntot'] = Ntot
                bb.attrs['BoxSize'] = self.datasource.BoxSize
        return

示例#2

0

显示文件

文件： FastPM.py 项目： mjvakili/nbodykit

    def __init__(self,
                 path,
                 BoxSize=None,
                 bunchsize=4 * 1024 * 1024,
                 rsd=None):

        self.path = path
        self.BoxSize = BoxSize
        self.bunchsize = bunchsize
        self.rsd = rsd

        BoxSize = numpy.empty(3, dtype='f8')
        f = bigfile.BigFileMPI(self.comm, self.path)
        try:
            header = f['.']
        except:
            header = f['header']

        BoxSize[:] = header.attrs['BoxSize'][0]
        OmegaM = header.attrs['OmegaM'][0]
        self.M0 = 27.75e10 * OmegaM * BoxSize[0]**3 / f['Position'].size
        self.size = f['Position'].size

        if self.comm.rank == 0:
            self.logger.info("File has boxsize of %s Mpc/h" % str(BoxSize))
            self.logger.info("Mass of a particle is %g Msun/h" % self.M0)

        if self.BoxSize is None:
            self.BoxSize = BoxSize
        else:
            if self.comm.rank == 0:
                self.logger.info("Overriding boxsize as %s" %
                                 str(self.BoxSize))

示例#3

0

显示文件

文件： BigFileGrid.py 项目： mjvakili/nbodykit

    def read(self, real):
        import bigfile
        if self.comm.rank == 0:
            self.logger.info("Reading from Nmesh = %d to Nmesh = %d" %
                             (self.Nmesh, real.Nmesh[0]))

        if any(real.Nmesh != self.Nmesh):
            pmread = ParticleMesh(BoxSize=real.BoxSize,
                                  Nmesh=(self.Nmesh, self.Nmesh, self.Nmesh),
                                  dtype='f4',
                                  comm=self.comm)
        else:
            pmread = real.pm

        f = bigfile.BigFileMPI(self.comm, self.path)

        with f[self.dataset] as ds:
            if self.isfourier:
                if self.comm.rank == 0:
                    self.logger.info("reading complex field")
                complex2 = ComplexField(pmread)
                assert self.comm.allreduce(complex2.size) == ds.size
                start = sum(
                    self.comm.allgather(complex2.size)[:self.comm.rank])
                end = start + complex2.size
                complex2.unsort(ds[start:end])
                complex2.resample(real)
            else:
                if self.comm.rank == 0:
                    self.logger.info("reading real field")
                real2 = RealField(pmread)
                start = sum(self.comm.allgather(real2.size)[:self.comm.rank])
                end = start + real2.size
                real2.unsort(ds[start:end])
                real2.resample(real)

示例#4

0

显示文件

    def save(self, output, result):
        """
        Save the power spectrum results to the specified output file
        
        Parameters
        ----------
        output : str
            the string specifying the file to save
        result : tuple
            the tuple returned by `run()` -- first argument specifies the bin
            edges and the second is a dictionary holding the data results
        """
        import bigfile
        import numpy

        result, stats = result

        if self.comm.rank == 0:
            self.logger.info('Output Nmesh : %d' % self.Nmesh)
            self.logger.info("writing to %s/%s in %d parts" % (output, self.dataset, self.Nfile))

        f = bigfile.BigFileMPI(self.comm, output, create=True)

        b = f.create_from_array(self.dataset, result, Nfile=self.Nfile)

        b.attrs['ndarray.shape'] = numpy.array([self.Nmesh, self.Nmesh, self.Nmesh], dtype='i8')
        b.attrs['BoxSize'] = numpy.array(self.datasource.BoxSize, dtype='f8')
        b.attrs['Nmesh'] = self.Nmesh
        b.attrs['paintNmesh'] = self.paintNmesh
        b.attrs['paintbrush'] = self.painter.paintbrush
        b.attrs['Ntot'] = stats['Ntot']

示例#5

0

显示文件

文件： HaloLabel.py 项目： mjvakili/nbodykit

    def __init__(self, path, bunchsize=4 * 1024 * 1024):

        self.path = path
        self.bunchsize = bunchsize

        f = bigfile.BigFileMPI(self.comm, self.path)
        self.size = f['Label'].size

示例#6

0

显示文件

    def save(self, output, dataset='Field', mode='real'):
        """
        Save the mesh as a :class:`~nbodykit.source.mesh.bigfile.BigFileMesh`
        on disk, either in real or complex space.

        Parameters
        ----------
        output : str
            name of the bigfile file
        dataset : str, optional
            name of the bigfile data set where the field is stored
        mode : str, optional
            real or complex; the form of the field to store
        """
        import bigfile
        import warnings
        import json
        from nbodykit.utils import JSONEncoder

        field = self.paint(mode=mode)

        with bigfile.BigFileMPI(self.pm.comm, output, create=True) as ff:
            data = numpy.empty(shape=field.size, dtype=field.dtype)
            field.ravel(out=data)
            with ff.create_from_array(dataset, data) as bb:
                if isinstance(field, RealField):
                    bb.attrs['ndarray.shape'] = field.pm.Nmesh
                    bb.attrs['BoxSize'] = field.pm.BoxSize
                    bb.attrs['Nmesh'] = field.pm.Nmesh
                elif isinstance(field, ComplexField):
                    bb.attrs[
                        'ndarray.shape'] = field.Nmesh, field.Nmesh, field.Nmesh // 2 + 1
                    bb.attrs['BoxSize'] = field.pm.BoxSize
                    bb.attrs['Nmesh'] = field.pm.Nmesh

                for key in field.attrs:
                    # do not override the above values -- they are vectors (from pm)
                    if key in bb.attrs: continue
                    value = field.attrs[key]
                    try:
                        bb.attrs[key] = value
                    except ValueError:
                        try:
                            json_str = 'json://' + json.dumps(value,
                                                              cls=JSONEncoder)
                            bb.attrs[key] = json_str
                        except:
                            warnings.warn(
                                "attribute %s of type %s is unsupported and lost while saving MeshSource"
                                % (key, type(value)))

示例#7

0

显示文件

文件： BigFileGrid.py 项目： mjvakili/nbodykit

    def __init__(self, path, dataset):

        self.path = path
        self.dataset = dataset

        import bigfile
        f = bigfile.BigFileMPI(self.comm, self.path)

        with f[self.dataset] as d:
            self.BoxSize = d.attrs['BoxSize']
            self.Nmesh = int(d.attrs['Nmesh'][0])
            if 'Ntot' in d.attrs:
                self.Ntot = d.attrs['Ntot'][0]
            else:
                self.Ntot = 0

            # Is this a complex field or a real field?
            if d.dtype.kind == 'c':
                self.isfourier = True
            else:
                self.isfourier = False

示例#8

0

显示文件

文件： HaloLabel.py 项目： mjvakili/nbodykit

    def parallel_read(self, columns, full=False):
        f = bigfile.BigFileMPI(self.comm, self.path)
        readcolumns = set(columns)

        # remove columns not in the file (None will be returned)
        for col in list(readcolumns):
            if col not in f:
                readcolumns.remove(col)

        done = False
        i = 0
        while not numpy.all(self.comm.allgather(done)):
            ret = []
            dataset = bigfile.BigData(f, readcolumns)

            Ntot = dataset.size
            start = self.comm.rank * Ntot // self.comm.size
            end = (self.comm.rank + 1) * Ntot // self.comm.size

            if not full:
                bunchstart = start + i * self.bunchsize
                bunchend = start + (i + 1) * self.bunchsize
                if bunchend > end: bunchend = end
                if bunchstart > end: bunchstart = end
            else:
                bunchstart = start
                bunchend = end

            if bunchend == end:
                done = True

            P = {}

            for column in readcolumns:
                data = dataset[column][bunchstart:bunchend]
                P[column] = data

            i = i + 1
            yield [P.get(column, None) for column in columns]

示例#9

0

显示文件

    def save(self, output, columns, datasets=None, header='Header'):
        """
        Save the CatalogSource to a :class:`bigfile.BigFile`.

        Only the selected columns are saved and :attr:`attrs` are saved in
        ``header``. The attrs of columns are stored in the datasets.

        Parameters
        ----------
        output : str
            the name of the file to write to
        columns : list of str
            the names of the columns to save in the file
        datasets : list of str, optional
            names for the data set where each column is stored; defaults to
            the name of the column
        header : str, optional
            the name of the data set holding the header information, where
            :attr:`attrs` is stored
        """
        import bigfile
        import json
        from nbodykit.utils import JSONEncoder

        # trim out any default columns; these do not need to be saved as
        # they are automatically available to every Catalog
        columns = [col for col in columns if not self[col].is_default]

        # also make sure no default columns in datasets
        if datasets is None:
            datasets = columns
        else:
            datasets = [col for col in datasets if not self[col].is_default]

        if len(datasets) != len(columns):
            raise ValueError(
                "`datasets` must have the same length as `columns`")

        with bigfile.BigFileMPI(comm=self.comm, filename=output,
                                create=True) as ff:
            try:
                bb = ff.open(header)
            except:
                bb = ff.create(header)
            with bb:
                for key in self.attrs:
                    try:
                        bb.attrs[key] = self.attrs[key]
                    except ValueError:
                        try:
                            json_str = 'json://' + json.dumps(self.attrs[key],
                                                              cls=JSONEncoder)
                            bb.attrs[key] = json_str
                        except:
                            raise ValueError(
                                "cannot save '%s' key in attrs dictionary" %
                                key)

            for column, dataset in zip(columns, datasets):
                c = self[column]

                # save column attrs too
                with ff.create_from_array(dataset, c) as bb:
                    if hasattr(c, 'attrs'):
                        for key in c.attrs:
                            bb.attrs[key] = c.attrs[key]

示例#10

0

显示文件

文件： preion-make-zreion.py 项目： rainwoodman/patchyreion

def main():
    ns = ap.parse_args()
    comm = MPI.COMM_WORLD

    ff = bigfile.BigFileMPI(comm, ns.fastpm)
    with ff['.'] as bb:
        BoxSize = bb.attrs['BoxSize'][0]
        Redshift = 1 / bb.attrs['ScalingFactor'][0] - 1

    Nmesh = int(BoxSize / ns.resolution * 2)
    # round it to 8.
    Nmesh -= Nmesh % 8

    if comm.rank == 0:
        logger.info("source = %s", ns.fastpm)
        logger.info("output = %s", ns.output)
        logger.info("BoxSize = %g", BoxSize)
        logger.info("Redshift = %g", Redshift)
        logger.info("Nmesh = %g", Nmesh)

    pm = ParticleMesh([Nmesh, Nmesh, Nmesh], BoxSize, comm=comm)

    real = RealField(pm)
    real[...] = 0

    with ff['Position'] as ds:
        logger.info(ds.size)
        for i in range(0, ds.size, ns.chunksize):
            sl = slice(i, i + ns.chunksize)
            pos = ds[sl]
            layout = pm.decompose(pos)
            lpos = layout.exchange(pos)
            real.paint(lpos, hold=True)

    mean = real.cmean()

    if comm.rank == 0:
        logger.info("mean particle per cell = %s", mean)

    real[...] /= mean
    real[...] -= 1

    complex = real.r2c()

    for k, i, slab in zip(complex.slabs.x, complex.slabs.i, complex.slabs):
        k2 = sum(kd**2 for kd in k)
        # tophat
        f = tophat(ns.filtersize, k2**0.5)
        slab[...] *= f
        # zreion
        slab[...] *= Bk(k2**0.5)
        slab[...] *= (1 + Redshift)

    real = complex.c2r()
    real[...] += Redshift

    mean = real.cmean()
    if comm.rank == 0:
        logger.info("zreion.mean = %s", mean)

    buffer = numpy.empty(real.size, real.dtype)
    real.sort(out=buffer)
    if comm.rank == 0:
        logger.info("sorted for output")

    with bigfile.BigFileMPI(comm, ns.output, create=True) as ff:
        with ff.create_from_array(ns.dataset, buffer) as bb:
            bb.attrs['BoxSize'] = BoxSize
            bb.attrs['Redshift'] = Redshift
            bb.attrs['TopHatFilterSize'] = ns.filtersize
            bb.attrs['Nmesh'] = Nmesh
        #
        # hack: compatible with current MPGadget. This is not really needed
        # we'll remove the bins later, since BoxSize and Nmesh are known.
        with ff.create("XYZ_bins", dtype='f8', size=Nmesh) as bb:
            if comm.rank == 0:
                bins = numpy.linspace(0, BoxSize * 1000., Nmesh, dtype='f8')
                bb.write(0, bins)

    if comm.rank == 0:
        logger.info("done. written at %s", ns.output)

示例#11

0

显示文件

文件： make_HI_reionization_table.py 项目： sbird/SimulationRunner

def generate_zreion_file(paramfile, output, redshift, resolution):
    """Do the work and output the file.
    This reads parameters from the MP-GenIC paramfile, generates a table of patchy reionization redshifts, and saves it.
    This table can be read by MP-Gadget. The core of the method is a correlation between large-scale overdensity and
    redshift of reionization calibrated from a radiative transfer simulation. To realise this, the code needs to know the large
    scale overdensity, which it does using FastPM.

    Arguments:
    - paramfile: genic parameter file to read from
    - output: file to save the reionization table to
    - redshift: redshift for the midpoint of reionization
    - Nmesh: sie of the particle grid to use
    """
    config = configobj.ConfigObj(infile=paramfile,
                                 configspec=GenICconfigspec,
                                 file_error=True)
    #Input sanitisation
    vtor = validate.Validator()
    config.validate(vtor)
    comm = MPI.COMM_WORLD

    logger = logging
    cm_per_mpc = 3.085678e24
    BoxSize = config["BoxSize"] * config["UnitLength_in_cm"] / cm_per_mpc
    Redshift = redshift

    Nmesh = int(BoxSize / resolution)
    # round it to 8.
    Nmesh -= Nmesh % 8

    # Top-hat filter the density field on one grid scale
    filtersize = resolution

    if comm.rank == 0:
        logger.info("output = %s", output)
        logger.info("BoxSize = %g", BoxSize)
        logger.info("Redshift = %g", Redshift)
        logger.info("Nmesh = %g", Nmesh)

    pm = ParticleMesh([Nmesh, Nmesh, Nmesh], BoxSize, comm=comm)

    mnu = numpy.array([config["MNue"], config["MNum"], config["MNut"]])
    omegacdm = config["Omega0"] - config["OmegaBaryon"] - numpy.sum(
        mnu) / 93.14 / config["HubbleParam"]**2
    cosmo = Cosmology(h=config["HubbleParam"],
                      Omega0_cdm=omegacdm,
                      T0_cmb=config["CMBTemperature"])
    state = get_lpt(pm, Redshift, cosmo, config["Seed"])
    real = state.to_mesh()

    logger.info("field painted")

    mean = real.cmean()

    if comm.rank == 0:
        logger.info("mean 2lpt density = %s", mean)

    real[...] /= mean
    real[...] -= 1

    cmplx = real.r2c()
    logger.info("field transformed")

    for k, _, slab in zip(cmplx.slabs.x, cmplx.slabs.i, cmplx.slabs):
        k2 = sum(kd**2 for kd in k)
        # tophat
        f = tophat(filtersize, k2**0.5)
        slab[...] *= f
        # zreion
        slab[...] *= Bofk(k2**0.5)
        slab[...] *= (1 + Redshift)

    real = cmplx.c2r()
    real[...] += Redshift
    logger.info("filters applied %d", real.size)

    mean = real.cmean()
    if comm.rank == 0:
        logger.info("zreion.mean = %s", mean)

    buffer = numpy.empty(real.size, real.dtype)
    real.ravel(out=buffer)
    if comm.rank == 0:
        logger.info("sorted for output")
    if os.path.exists(output):
        raise IOError("Refusing to write to existing file: ", output)

    with bigfile.BigFileMPI(comm, output, create=True) as ff:
        with ff.create_from_array("Zreion_Table", buffer) as bb:
            bb.attrs['BoxSize'] = BoxSize
            bb.attrs['Redshift'] = Redshift
            bb.attrs['TopHatFilterSize'] = filtersize
            bb.attrs['Nmesh'] = Nmesh
        #
        # hack: compatible with current MPGadget. This is not really needed
        # we'll remove the bins later, since BoxSize and Nmesh are known.
        with ff.create("XYZ_bins", dtype='f8', size=Nmesh) as bb:
            if comm.rank == 0:
                bins = numpy.linspace(0, BoxSize, Nmesh, dtype='f8')
                bb.write(0, bins)

    if comm.rank == 0:
        logger.info("done. written at %s", output)

示例#12

0

显示文件

文件： FastPM.py 项目： mjvakili/nbodykit

    def parallel_read(self, columns, full=False):
        f = bigfile.BigFileMPI(self.comm, self.path)
        try:
            header = f['.']
        except:
            header = f['header']
        boxsize = header.attrs['BoxSize'][0]
        RSD = header.attrs['RSDFactor'][0]
        if boxsize != self.BoxSize[0]:
            raise ValueError("Box size mismatch, expecting %g" % boxsize)

        readcolumns = set(columns)
        if self.rsd is not None:
            readcolumns = set(columns + ['Velocity'])
        if 'InitialPosition' in columns:
            readcolumns.add('ID')
            readcolumns.remove('InitialPosition')

        if 'Mass' in readcolumns:
            readcolumns.remove('Mass')

        # remove columns not in the file (None will be returned)
        for col in list(readcolumns):
            if col not in f:
                readcolumns.remove(col)

        done = False
        i = 0
        while not numpy.all(self.comm.allgather(done)):
            ret = []
            dataset = bigfile.BigData(f, readcolumns)

            Ntot = dataset.size
            start = self.comm.rank * Ntot // self.comm.size
            end = (self.comm.rank + 1) * Ntot // self.comm.size

            if not full:
                bunchstart = start + i * self.bunchsize
                bunchend = start + (i + 1) * self.bunchsize
                if bunchend > end: bunchend = end
                if bunchstart > end: bunchstart = end
            else:
                bunchstart = start
                bunchend = end

            if bunchend == end:
                done = True

            P = {}

            for column in readcolumns:
                data = dataset[column][bunchstart:bunchend]
                P[column] = data

            if 'Velocity' in P:
                P['Velocity'] *= RSD

            if 'Mass' in columns:
                P['Mass'] = numpy.ones(bunchend - bunchstart,
                                       dtype='u1') * self.M0

            if self.rsd is not None:
                dir = "xyz".index(self.rsd)
                P['Position'][:, dir] += P['Velocity'][:, dir]
                P['Position'][:, dir] %= self.BoxSize[dir]
            if 'InitialPosition' in columns:
                P['InitialPosition'] = numpy.empty((len(P['ID']), 3), 'f4')
                nc = int(self.size**(1. / 3) + 0.5)
                id = P['ID'].copy()
                for nc in range(nc - 10, nc + 10):
                    if nc**3 == self.size: break
                for d in [2, 1, 0]:
                    P['InitialPosition'][:, d] = id % nc
                    id[:] //= nc
                cellsize = self.BoxSize[0] / nc
                P['InitialPosition'][:] += 0.5
                P['InitialPosition'][:] *= cellsize

            i = i + 1
            yield [P.get(column, None) for column in columns]