def save(self, output, data): if self.without_labels: catalog, Ntot = data else: catalog, labels, Ntot = data if self.comm.rank == 0: with h5py.File(output, 'w') as ff: # do not create dataset then fill because of # https://github.com/h5py/h5py/pull/606 dataset = ff.create_dataset(name='FOFGroups', data=catalog) dataset.attrs['Ntot'] = Ntot dataset.attrs['LinkLength'] = self.linklength dataset.attrs['BoxSize'] = self.datasource.BoxSize if not self.without_labels: output = output.replace('.hdf5', '.labels') bf = bigfile.BigFileMPI(self.comm, output, create=True) with bf.create_from_array("Label", labels, Nfile=(self.comm.size + 7) // 8) as bb: bb.attrs['LinkLength'] = self.linklength bb.attrs['Ntot'] = Ntot bb.attrs['BoxSize'] = self.datasource.BoxSize return
def __init__(self, path, BoxSize=None, bunchsize=4 * 1024 * 1024, rsd=None): self.path = path self.BoxSize = BoxSize self.bunchsize = bunchsize self.rsd = rsd BoxSize = numpy.empty(3, dtype='f8') f = bigfile.BigFileMPI(self.comm, self.path) try: header = f['.'] except: header = f['header'] BoxSize[:] = header.attrs['BoxSize'][0] OmegaM = header.attrs['OmegaM'][0] self.M0 = 27.75e10 * OmegaM * BoxSize[0]**3 / f['Position'].size self.size = f['Position'].size if self.comm.rank == 0: self.logger.info("File has boxsize of %s Mpc/h" % str(BoxSize)) self.logger.info("Mass of a particle is %g Msun/h" % self.M0) if self.BoxSize is None: self.BoxSize = BoxSize else: if self.comm.rank == 0: self.logger.info("Overriding boxsize as %s" % str(self.BoxSize))
def read(self, real): import bigfile if self.comm.rank == 0: self.logger.info("Reading from Nmesh = %d to Nmesh = %d" % (self.Nmesh, real.Nmesh[0])) if any(real.Nmesh != self.Nmesh): pmread = ParticleMesh(BoxSize=real.BoxSize, Nmesh=(self.Nmesh, self.Nmesh, self.Nmesh), dtype='f4', comm=self.comm) else: pmread = real.pm f = bigfile.BigFileMPI(self.comm, self.path) with f[self.dataset] as ds: if self.isfourier: if self.comm.rank == 0: self.logger.info("reading complex field") complex2 = ComplexField(pmread) assert self.comm.allreduce(complex2.size) == ds.size start = sum( self.comm.allgather(complex2.size)[:self.comm.rank]) end = start + complex2.size complex2.unsort(ds[start:end]) complex2.resample(real) else: if self.comm.rank == 0: self.logger.info("reading real field") real2 = RealField(pmread) start = sum(self.comm.allgather(real2.size)[:self.comm.rank]) end = start + real2.size real2.unsort(ds[start:end]) real2.resample(real)
def save(self, output, result): """ Save the power spectrum results to the specified output file Parameters ---------- output : str the string specifying the file to save result : tuple the tuple returned by `run()` -- first argument specifies the bin edges and the second is a dictionary holding the data results """ import bigfile import numpy result, stats = result if self.comm.rank == 0: self.logger.info('Output Nmesh : %d' % self.Nmesh) self.logger.info("writing to %s/%s in %d parts" % (output, self.dataset, self.Nfile)) f = bigfile.BigFileMPI(self.comm, output, create=True) b = f.create_from_array(self.dataset, result, Nfile=self.Nfile) b.attrs['ndarray.shape'] = numpy.array([self.Nmesh, self.Nmesh, self.Nmesh], dtype='i8') b.attrs['BoxSize'] = numpy.array(self.datasource.BoxSize, dtype='f8') b.attrs['Nmesh'] = self.Nmesh b.attrs['paintNmesh'] = self.paintNmesh b.attrs['paintbrush'] = self.painter.paintbrush b.attrs['Ntot'] = stats['Ntot']
def __init__(self, path, bunchsize=4 * 1024 * 1024): self.path = path self.bunchsize = bunchsize f = bigfile.BigFileMPI(self.comm, self.path) self.size = f['Label'].size
def save(self, output, dataset='Field', mode='real'): """ Save the mesh as a :class:`~nbodykit.source.mesh.bigfile.BigFileMesh` on disk, either in real or complex space. Parameters ---------- output : str name of the bigfile file dataset : str, optional name of the bigfile data set where the field is stored mode : str, optional real or complex; the form of the field to store """ import bigfile import warnings import json from nbodykit.utils import JSONEncoder field = self.paint(mode=mode) with bigfile.BigFileMPI(self.pm.comm, output, create=True) as ff: data = numpy.empty(shape=field.size, dtype=field.dtype) field.ravel(out=data) with ff.create_from_array(dataset, data) as bb: if isinstance(field, RealField): bb.attrs['ndarray.shape'] = field.pm.Nmesh bb.attrs['BoxSize'] = field.pm.BoxSize bb.attrs['Nmesh'] = field.pm.Nmesh elif isinstance(field, ComplexField): bb.attrs[ 'ndarray.shape'] = field.Nmesh, field.Nmesh, field.Nmesh // 2 + 1 bb.attrs['BoxSize'] = field.pm.BoxSize bb.attrs['Nmesh'] = field.pm.Nmesh for key in field.attrs: # do not override the above values -- they are vectors (from pm) if key in bb.attrs: continue value = field.attrs[key] try: bb.attrs[key] = value except ValueError: try: json_str = 'json://' + json.dumps(value, cls=JSONEncoder) bb.attrs[key] = json_str except: warnings.warn( "attribute %s of type %s is unsupported and lost while saving MeshSource" % (key, type(value)))
def __init__(self, path, dataset): self.path = path self.dataset = dataset import bigfile f = bigfile.BigFileMPI(self.comm, self.path) with f[self.dataset] as d: self.BoxSize = d.attrs['BoxSize'] self.Nmesh = int(d.attrs['Nmesh'][0]) if 'Ntot' in d.attrs: self.Ntot = d.attrs['Ntot'][0] else: self.Ntot = 0 # Is this a complex field or a real field? if d.dtype.kind == 'c': self.isfourier = True else: self.isfourier = False
def parallel_read(self, columns, full=False): f = bigfile.BigFileMPI(self.comm, self.path) readcolumns = set(columns) # remove columns not in the file (None will be returned) for col in list(readcolumns): if col not in f: readcolumns.remove(col) done = False i = 0 while not numpy.all(self.comm.allgather(done)): ret = [] dataset = bigfile.BigData(f, readcolumns) Ntot = dataset.size start = self.comm.rank * Ntot // self.comm.size end = (self.comm.rank + 1) * Ntot // self.comm.size if not full: bunchstart = start + i * self.bunchsize bunchend = start + (i + 1) * self.bunchsize if bunchend > end: bunchend = end if bunchstart > end: bunchstart = end else: bunchstart = start bunchend = end if bunchend == end: done = True P = {} for column in readcolumns: data = dataset[column][bunchstart:bunchend] P[column] = data i = i + 1 yield [P.get(column, None) for column in columns]
def save(self, output, columns, datasets=None, header='Header'): """ Save the CatalogSource to a :class:`bigfile.BigFile`. Only the selected columns are saved and :attr:`attrs` are saved in ``header``. The attrs of columns are stored in the datasets. Parameters ---------- output : str the name of the file to write to columns : list of str the names of the columns to save in the file datasets : list of str, optional names for the data set where each column is stored; defaults to the name of the column header : str, optional the name of the data set holding the header information, where :attr:`attrs` is stored """ import bigfile import json from nbodykit.utils import JSONEncoder # trim out any default columns; these do not need to be saved as # they are automatically available to every Catalog columns = [col for col in columns if not self[col].is_default] # also make sure no default columns in datasets if datasets is None: datasets = columns else: datasets = [col for col in datasets if not self[col].is_default] if len(datasets) != len(columns): raise ValueError( "`datasets` must have the same length as `columns`") with bigfile.BigFileMPI(comm=self.comm, filename=output, create=True) as ff: try: bb = ff.open(header) except: bb = ff.create(header) with bb: for key in self.attrs: try: bb.attrs[key] = self.attrs[key] except ValueError: try: json_str = 'json://' + json.dumps(self.attrs[key], cls=JSONEncoder) bb.attrs[key] = json_str except: raise ValueError( "cannot save '%s' key in attrs dictionary" % key) for column, dataset in zip(columns, datasets): c = self[column] # save column attrs too with ff.create_from_array(dataset, c) as bb: if hasattr(c, 'attrs'): for key in c.attrs: bb.attrs[key] = c.attrs[key]
def main(): ns = ap.parse_args() comm = MPI.COMM_WORLD ff = bigfile.BigFileMPI(comm, ns.fastpm) with ff['.'] as bb: BoxSize = bb.attrs['BoxSize'][0] Redshift = 1 / bb.attrs['ScalingFactor'][0] - 1 Nmesh = int(BoxSize / ns.resolution * 2) # round it to 8. Nmesh -= Nmesh % 8 if comm.rank == 0: logger.info("source = %s", ns.fastpm) logger.info("output = %s", ns.output) logger.info("BoxSize = %g", BoxSize) logger.info("Redshift = %g", Redshift) logger.info("Nmesh = %g", Nmesh) pm = ParticleMesh([Nmesh, Nmesh, Nmesh], BoxSize, comm=comm) real = RealField(pm) real[...] = 0 with ff['Position'] as ds: logger.info(ds.size) for i in range(0, ds.size, ns.chunksize): sl = slice(i, i + ns.chunksize) pos = ds[sl] layout = pm.decompose(pos) lpos = layout.exchange(pos) real.paint(lpos, hold=True) mean = real.cmean() if comm.rank == 0: logger.info("mean particle per cell = %s", mean) real[...] /= mean real[...] -= 1 complex = real.r2c() for k, i, slab in zip(complex.slabs.x, complex.slabs.i, complex.slabs): k2 = sum(kd**2 for kd in k) # tophat f = tophat(ns.filtersize, k2**0.5) slab[...] *= f # zreion slab[...] *= Bk(k2**0.5) slab[...] *= (1 + Redshift) real = complex.c2r() real[...] += Redshift mean = real.cmean() if comm.rank == 0: logger.info("zreion.mean = %s", mean) buffer = numpy.empty(real.size, real.dtype) real.sort(out=buffer) if comm.rank == 0: logger.info("sorted for output") with bigfile.BigFileMPI(comm, ns.output, create=True) as ff: with ff.create_from_array(ns.dataset, buffer) as bb: bb.attrs['BoxSize'] = BoxSize bb.attrs['Redshift'] = Redshift bb.attrs['TopHatFilterSize'] = ns.filtersize bb.attrs['Nmesh'] = Nmesh # # hack: compatible with current MPGadget. This is not really needed # we'll remove the bins later, since BoxSize and Nmesh are known. with ff.create("XYZ_bins", dtype='f8', size=Nmesh) as bb: if comm.rank == 0: bins = numpy.linspace(0, BoxSize * 1000., Nmesh, dtype='f8') bb.write(0, bins) if comm.rank == 0: logger.info("done. written at %s", ns.output)
def generate_zreion_file(paramfile, output, redshift, resolution): """Do the work and output the file. This reads parameters from the MP-GenIC paramfile, generates a table of patchy reionization redshifts, and saves it. This table can be read by MP-Gadget. The core of the method is a correlation between large-scale overdensity and redshift of reionization calibrated from a radiative transfer simulation. To realise this, the code needs to know the large scale overdensity, which it does using FastPM. Arguments: - paramfile: genic parameter file to read from - output: file to save the reionization table to - redshift: redshift for the midpoint of reionization - Nmesh: sie of the particle grid to use """ config = configobj.ConfigObj(infile=paramfile, configspec=GenICconfigspec, file_error=True) #Input sanitisation vtor = validate.Validator() config.validate(vtor) comm = MPI.COMM_WORLD logger = logging cm_per_mpc = 3.085678e24 BoxSize = config["BoxSize"] * config["UnitLength_in_cm"] / cm_per_mpc Redshift = redshift Nmesh = int(BoxSize / resolution) # round it to 8. Nmesh -= Nmesh % 8 # Top-hat filter the density field on one grid scale filtersize = resolution if comm.rank == 0: logger.info("output = %s", output) logger.info("BoxSize = %g", BoxSize) logger.info("Redshift = %g", Redshift) logger.info("Nmesh = %g", Nmesh) pm = ParticleMesh([Nmesh, Nmesh, Nmesh], BoxSize, comm=comm) mnu = numpy.array([config["MNue"], config["MNum"], config["MNut"]]) omegacdm = config["Omega0"] - config["OmegaBaryon"] - numpy.sum( mnu) / 93.14 / config["HubbleParam"]**2 cosmo = Cosmology(h=config["HubbleParam"], Omega0_cdm=omegacdm, T0_cmb=config["CMBTemperature"]) state = get_lpt(pm, Redshift, cosmo, config["Seed"]) real = state.to_mesh() logger.info("field painted") mean = real.cmean() if comm.rank == 0: logger.info("mean 2lpt density = %s", mean) real[...] /= mean real[...] -= 1 cmplx = real.r2c() logger.info("field transformed") for k, _, slab in zip(cmplx.slabs.x, cmplx.slabs.i, cmplx.slabs): k2 = sum(kd**2 for kd in k) # tophat f = tophat(filtersize, k2**0.5) slab[...] *= f # zreion slab[...] *= Bofk(k2**0.5) slab[...] *= (1 + Redshift) real = cmplx.c2r() real[...] += Redshift logger.info("filters applied %d", real.size) mean = real.cmean() if comm.rank == 0: logger.info("zreion.mean = %s", mean) buffer = numpy.empty(real.size, real.dtype) real.ravel(out=buffer) if comm.rank == 0: logger.info("sorted for output") if os.path.exists(output): raise IOError("Refusing to write to existing file: ", output) with bigfile.BigFileMPI(comm, output, create=True) as ff: with ff.create_from_array("Zreion_Table", buffer) as bb: bb.attrs['BoxSize'] = BoxSize bb.attrs['Redshift'] = Redshift bb.attrs['TopHatFilterSize'] = filtersize bb.attrs['Nmesh'] = Nmesh # # hack: compatible with current MPGadget. This is not really needed # we'll remove the bins later, since BoxSize and Nmesh are known. with ff.create("XYZ_bins", dtype='f8', size=Nmesh) as bb: if comm.rank == 0: bins = numpy.linspace(0, BoxSize, Nmesh, dtype='f8') bb.write(0, bins) if comm.rank == 0: logger.info("done. written at %s", output)
def parallel_read(self, columns, full=False): f = bigfile.BigFileMPI(self.comm, self.path) try: header = f['.'] except: header = f['header'] boxsize = header.attrs['BoxSize'][0] RSD = header.attrs['RSDFactor'][0] if boxsize != self.BoxSize[0]: raise ValueError("Box size mismatch, expecting %g" % boxsize) readcolumns = set(columns) if self.rsd is not None: readcolumns = set(columns + ['Velocity']) if 'InitialPosition' in columns: readcolumns.add('ID') readcolumns.remove('InitialPosition') if 'Mass' in readcolumns: readcolumns.remove('Mass') # remove columns not in the file (None will be returned) for col in list(readcolumns): if col not in f: readcolumns.remove(col) done = False i = 0 while not numpy.all(self.comm.allgather(done)): ret = [] dataset = bigfile.BigData(f, readcolumns) Ntot = dataset.size start = self.comm.rank * Ntot // self.comm.size end = (self.comm.rank + 1) * Ntot // self.comm.size if not full: bunchstart = start + i * self.bunchsize bunchend = start + (i + 1) * self.bunchsize if bunchend > end: bunchend = end if bunchstart > end: bunchstart = end else: bunchstart = start bunchend = end if bunchend == end: done = True P = {} for column in readcolumns: data = dataset[column][bunchstart:bunchend] P[column] = data if 'Velocity' in P: P['Velocity'] *= RSD if 'Mass' in columns: P['Mass'] = numpy.ones(bunchend - bunchstart, dtype='u1') * self.M0 if self.rsd is not None: dir = "xyz".index(self.rsd) P['Position'][:, dir] += P['Velocity'][:, dir] P['Position'][:, dir] %= self.BoxSize[dir] if 'InitialPosition' in columns: P['InitialPosition'] = numpy.empty((len(P['ID']), 3), 'f4') nc = int(self.size**(1. / 3) + 0.5) id = P['ID'].copy() for nc in range(nc - 10, nc + 10): if nc**3 == self.size: break for d in [2, 1, 0]: P['InitialPosition'][:, d] = id % nc id[:] //= nc cellsize = self.BoxSize[0] / nc P['InitialPosition'][:] += 0.5 P['InitialPosition'][:] *= cellsize i = i + 1 yield [P.get(column, None) for column in columns]