def compare_fields(newsnap, oldsnap, ptype=1, field="Position"): """Compare two fields in a snapshot (by default DM positions): newsnap and oldsnap are compared. the 'field' array is compared for particle type 'ptype'. Returns the absolute value of the differences.""" pp_old = bigfile.BigFile(oldsnap) box = pp_old["Header"].attrs["BoxSize"] otime = pp_old["Header"].attrs["Time"] pp_new = bigfile.BigFile(newsnap) ntime = pp_new["Header"].attrs["Time"] nbox = pp_new["Header"].attrs["BoxSize"] assert np.abs(otime - ntime) < 1e-8 assert np.abs(box - nbox) < 1e-8 sptype = str(ptype) id_new = pp_new[sptype + "/ID"][:] id_old = pp_old[sptype + "/ID"][:] pos_new = pp_new[sptype + "/" + field][:] pos_old = pp_old[sptype + "/" + field][:] p_sort_new = pos_new[np.argsort(id_new)] p_sort_old = pos_old[np.argsort(id_old)] diff = p_sort_new - p_sort_old #Positions wrap, so the differences if field == "Position": ii = np.where(diff > box / 2) diff[ii] = diff[ii] - box ii = np.where(diff < -box / 2) diff[ii] = diff[ii] + box return np.abs(diff)
def complex_to_fastpm(fn, ds, complex, BoxSize): """ >>> c = numpy.fft.rfftn(numpy.random.normal(size=(128, 128, 128))) >>> complex_to_fastpm("IC", "Copy", c, 128.) >>> a = bigfile.BigFile("IC")['Copy'][:] >>> print(a[1], c.ravel()[1]) >>> print(a[300], c.ravel()[300]) """ import bigfile import numpy bf = bigfile.BigFile(fn, create=True) bb = bf.create(ds, complex.dtype, complex.size, Nfile=1) Nmesh = complex.shape[0] bb.attrs['Nmesh'] = Nmesh bb.attrs['BoxSize'] = BoxSize # This ensures we write the array as C-Contiguous bb.write(0, complex) # Thus we set the strides and shape accordingly for FastPM to pick up. bb.attrs['ndarray.ndim'] = 3 bb.attrs['ndarray.shape'] = (Nmesh, Nmesh, Nmesh) bb.attrs['ndarray.strides'] = (Nmesh * (Nmesh // 2 + 1), Nmesh // 2 + 1, 1)
def write_big_file(bfname, hdf5name): """Find all the HDF5 files in the snapshot and merge them into a bigfile.""" #Find all the HDF5 snapshot set. hdf5_files = glob.glob(hdf5name) if len(hdf5_files) == 0: hdf5_files = glob.glob(hdf5name + ".*.hdf5") elif os.path.isdir(hdf5_files[0]): hdf5_files = glob.glob( os.path.join(hdf5name, "*_[0-9][0-9][0-9].*.hdf5")) if len(hdf5_files) == 0: raise IOError("Could not find hdF5 snapshot as %s (.*.hdf5)" % hdf5name) #Sort so we get a consistent answer each time. hdf5_files = sorted(hdf5_files) if not h5py.is_hdf5(hdf5_files[0]): raise IOError("%s is not hdf5!" % hdf5_files[0]) hdf5 = h5py.File(hdf5_files[0], 'r') bf = bigfile.BigFile(bfname, create=True) atime = write_bigfile_header(hdf5, bf) for n in range(6): bf.create(str(n)) create_big_file_arrays(bf, hdf5) hdf5.close() startpart = np.zeros(6, dtype=np.int) for hfile in hdf5_files: startpart = write_bf_segment(bf, hfile, startpart, atime) print("Copied HDF file %s" % hfile)
def open(cls, filename, pool=None, header_kwargs=dict(), **kwargs): if bigfile is None: raise ImportError("bigfile must be installed!") fp = bigfile.BigFile(cls.buildFilename(filename, pool, **kwargs)) return cls(fp, pool, header_kwargs=header_kwargs)
def get_particle_property_within_groups(output_path, particle_property, p_type, desired_redshift, group_index): output_redshift, output_snapshot = desired_redshift_to_output_redshift( output_path, desired_redshift) pig = bigfile.BigFile(output_path + 'PIG_%s' % output_snapshot) GroupID = pig.open('%d/GroupID' % (p_type))[:] finalproperty = pig.open('%d/%s' % (p_type, particle_property))[:] return finalproperty[GroupID == GroupID[group_index]], output_redshift
def get_box_size(output_path): output_file_names = os.listdir(output_path) snapshot_space = [] redshift_space = [] for name in output_file_names: if ('PIG' in name): snapshot_number = name[4:] pig = bigfile.BigFile(output_path + 'PIG_%s' % snapshot_number) header = pig.open('/Header') return header.attrs['BoxSize'][0]
def __init__(self, num, base): fname = base snap = str(num).rjust(3, '0') new_fname = os.path.join(base, "PART_" + snap) #Check for snapshot directory if os.path.exists(new_fname): fname = new_fname self._f_handle = bigfile.BigFile(fname, 'r') if "Header" not in self._f_handle.blocks: raise IOError("No BigFile snapshot at", new_fname) AbstractSnapshot.__init__(self)
def test_save_state(): """ Tests the BigFile saving function """ klin = np.loadtxt('flowpm/data/Planck15_a1p00.txt').T[0] plin = np.loadtxt('flowpm/data/Planck15_a1p00.txt').T[1] ipklin = iuspline(klin, plin) a0 = 0.1 nc = [16, 16, 16] boxsize = [100., 100., 100.] cosmo = flowpm.cosmology.Planck15() initial_conditions = flowpm.linear_field( nc, # size of the cube boxsize, # Physical size of the cube ipklin, # Initial powerspectrum batch_size=2) # Sample particles state = flowpm.lpt_init(cosmo, initial_conditions, a0) with tempfile.TemporaryDirectory() as tmpdirname: filename = tmpdirname + '/testsave' save_state(cosmo, state, a0, nc, boxsize, filename) # Now try to reload the information using BigFile bf = bigfile.BigFile(filename) # Testing recovery of header header = bf['Header'] assert_allclose(np.array(header.attrs['NC']), np.array(nc)) assert_allclose(np.array(header.attrs['BoxSize']), np.array(boxsize)) assert_allclose(np.array(header.attrs['OmegaCDM']), np.array(cosmo.Omega_c)) assert_allclose(np.array(header.attrs['OmegaB']), np.array(cosmo.Omega_b)) assert_allclose(np.array(header.attrs['OmegaK']), np.array(cosmo.Omega_k)) assert_allclose(np.array(header.attrs['h']), np.array(cosmo.h)) assert_allclose(np.array(header.attrs['Sigma8']), np.array(cosmo.sigma8)) assert_allclose(np.array(header.attrs['w0']), np.array(cosmo.w0)) assert_allclose(np.array(header.attrs['wa']), np.array(cosmo.wa)) assert_allclose(np.array(header.attrs['Time']), np.array(a0)) # Testing recovery of data pos = bf['1/Position'] assert_allclose(pos[:], state[0, 1].numpy() / nc[0] * boxsize[0]) vel = bf['1/Velocity'] assert_allclose(vel[:], state[1, 1].numpy() / nc[0] * boxsize[0]) # Closing file bf.close()
def __init__(self, cachedir, aliases): import bigfile self.cachedir = cachedir with bigfile.BigFile(cachedir, create=True) as bf: bd = bigfile.BigData(bf) self._size = bd.size self._dtype = bd.dtype self.aliases = dict([(new, (old, transform)) for old, new, transform in aliases]) ColumnStore.__init__(self)
def read_ptype(self, ptype, columns, full): f = bigfile.BigFile(self.path) done = False i = 0 while not numpy.all(self.comm.allgather(done)): ret = [] for column in columns: f = bigfile.BigFile(self.path) read_column = column if self.subsample: if ptype in ("0", "1"): read_column = read_column + '.sample' if ptype == 'FOFGroups': if column == 'Position': read_column = 'MassCenterPosition' if column == 'Velocity': read_column = 'MassCenterVelocity' cdata = f['%s/%s' % (ptype, read_column)] Ntot = cdata.size start = self.comm.rank * Ntot // self.comm.size end = (self.comm.rank + 1) * Ntot // self.comm.size if not full: bunchstart = start + i * self.bunchsize bunchend = start + (i + 1) * self.bunchsize if bunchend > end: bunchend = end if bunchstart > end: bunchstart = end else: bunchstart = start bunchend = end if bunchend == end: done = True data = cdata[bunchstart:bunchend] ret.append(data) i = i + 1 yield ret
def read(self, columns, start, stop, step=1): """ Read the specified column(s) over the given range, as a dictionary 'start' and 'stop' should be between 0 and :attr:`size`, which is the total size of the binary file (in particles) """ import bigfile if isinstance(columns, string_types): columns = [columns] with bigfile.BigFile(filename=self.path)[self.dataset] as f: ds = bigfile.BigData(f, columns) return ds[start:stop][::step]
def write_all_hdf_files(hdf5name, bfname): """Work out which particle set goes to which HDF5 file and write it.""" bf = bigfile.BigFile(bfname, 'r') nfiles = compute_nfiles(bf["Header"].attrs["TotNumPart"]) if not os.path.exists(hdf5name): os.mkdir(hdf5name) mm = re.search("PART_([0-9]*)", bfname) nsnap = '000' if len(mm.groups()) > 0: nsnap = mm.groups()[0] hdf5name = os.path.join(hdf5name, "snap_" + nsnap) print("Writing %d hdf snapshot files to %s" % (nfiles, hdf5name)) for nn in range(nfiles): write_hdf_file(bf, hdf5name, nn, nfiles) print("Wrote file %d" % nn)
def load_bigfile(filename, dataset): """ Load a bigfile using ``bigfile``, returning a ``numpy`` array Parameters ---------- dataset : str the name of the HDF5 dataset to load Returns ------- array_like : the loaded data """ import bigfile return bigfile.BigFile(filename)[dataset][...]
def build(self): files = self.listfiles() bf = bigfile.BigFile(self.destdir, create=True) fulldtype = fits.read_table(files[0]).dtype dtype = [(column, fulldtype[column]) for column in self.columns if column is not 'BRICK_PRIMARY'] dtype.append(('BRICK_PRIMARY', '?')) dtype = numpy.dtype(dtype) sizes = [] for filename in files: sizes.append(fits.size_table(filename)) sizes = numpy.array(sizes) offsets = numpy.concatenate([[0], numpy.cumsum(sizes)]) print("total number of objects:", sizes.sum()) blocks = {} for column in self.columns: blocks[column] = bf.create(column, dtype=dtype[column], size=sizes.sum(), Nfile=1) for i, filename in enumerate(files): onefile = fits.read_table(filename) for column in self.columns: onedata = numpy.empty(len(onefile), dtype=dtype[column]) if column != 'BRICK_PRIMARY': onedata[...] = onefile[column] else: try: onedata[...] = onefile[column] except: onedata[...] = True blocks[column].write(offsets[i], onedata) print(filename, 'done')
def __init__(self, path, exclude=None, header=Automatic, dataset='./'): if not dataset.endswith('/'): dataset = dataset + '/' import bigfile self.dataset = dataset self.path = path # store the attributes self.attrs = {} # the file path with bigfile.BigFile(filename=path) as ff: columns = ff[self.dataset].blocks if header is Automatic: for header in ['Header', 'header', './']: if header in columns: break if exclude is None: exclude = [header] columns = list(set(columns) - set(exclude)) ds = bigfile.BigData(ff[self.dataset], columns) # set the data type and size self.dtype = ds.dtype self.size = ds.size header = ff[header] attrs = header.attrs # copy over the attrs for k in attrs.keys(): # load a JSON representation if str starts with json::// if isinstance(attrs[k], string_types) and attrs[k].startswith('json://'): self.attrs[k] = json.loads(attrs[k][7:], cls=JSONDecoder) # copy over an array else: self.attrs[k] = numpy.array(attrs[k], copy=True)
def __init__(self, num, base, comm=None): self.comm = comm if self.comm is not None: self.rank = comm.Get_rank() self.size = comm.Get_size() else: self.size = 1 self.rank = 0 self.parts_rank = None fname = base snap = str(num).rjust(3, '0') new_fname = os.path.join(base, "PART_" + snap) #Check for snapshot directory if os.path.exists(new_fname): fname = new_fname self._f_handle = bigfile.BigFile(fname, 'r') if "Header" not in self._f_handle.blocks: raise IOError("No BigFile snapshot at", new_fname) AbstractSnapshot.__init__(self)
def parallel_read(self, columns, full=False): f = bigfile.BigFile(self.path) header = f['header'] boxsize = header.attrs['BoxSize'][0] ptypes = self.ptypes readcolumns = [] for column in columns: if column == 'HI': if 'Mass' not in readcolumns: readcolumns.append('Mass') if 'NeutralHydrogenFraction' not in readcolumns: readcolumns.append('NeutralHydrogenFraction') else: readcolumns.append(column) readcolumns = readcolumns + self.load for ptype in ptypes: for data in self.read_ptype(ptype, readcolumns, full): P = dict(zip(readcolumns, data)) if 'HI' in columns: P['HI'] = P['NeutralHydrogenFraction'] * P['Mass'] if 'Position' in columns: P['Position'][:] *= self.BoxSize / boxsize P['Position'][:] %= self.BoxSize if 'Velocity' in columns: raise NotImplementedError if self.select is not None: mask = self.select.get_mask(P) else: mask = Ellipsis toret = [] for column in columns: d = P.get(column, None) if d is not None: d = d[mask] toret.append(d) yield toret
def get_snapshot_redshift_correspondence(output_path): output_file_names = os.listdir(output_path) # print(output_file_names) # print(output_path) snapshot_space = [] redshift_space = [] for name in output_file_names: if ('PIG' in name): try: snapshot_number = name[4:] snapshot_space.append(snapshot_number) except: print("Warning: Ignoring filename:%s" % name) snapshot_space = numpy.sort(numpy.array(snapshot_space)) for snapshot_number in snapshot_space: pig = bigfile.BigFile(output_path + 'PIG_%s' % snapshot_number) header = pig.open('/Header') redshift = 1. / header.attrs['Time'][0] - 1 redshift_space.append(redshift) return numpy.array(snapshot_space), numpy.array(redshift_space)
def HMFFromFOF(foftable, h0=False, bins='auto'): """Print a conventionally normalised halo mass function from the FOF tables. Units returned are: dn/dM (M_sun/Mpc^3) (comoving) Note no little-h! If h0 == True, units are dn/dM (h^4 M_sun/Mpc^3) bins specifies the number of evenly spaced bins if an integer, or one of the strings understood by numpy.histogram.""" bf = bigfile.BigFile(foftable) #1 solar in g msun_in_g = 1.989e33 #1 Mpc in cm Mpc_in_cm = 3.085678e+24 #In units of 10^10 M_sun by default. try: imass_in_g = bf["Header"].attrs["UnitMass_in_g"] except KeyError: imass_in_g = 1.989e43 #Length in units of kpc/h by default try: ilength_in_cm = bf["Header"].attrs["UnitLength_in_cm"] except KeyError: ilength_in_cm = 3.085678e+21 hub = bf["Header"].attrs["HubbleParam"] box = bf["Header"].attrs["BoxSize"] #Convert to Mpc from kpc/h: box *= ilength_in_cm / hub / Mpc_in_cm masses = bf["FOFGroups/Mass"][:] #This is N(M) evenly spaced in log(M) NM, Mbins = np.histogram(np.log10(masses), bins=bins) #Convert Mbins to Msun Mbins = 10**Mbins Mbins *= (imass_in_g / msun_in_g) #Find dM: #This is dn/dM (Msun) dndm = NM / (Mbins[1:] - Mbins[:-1]) Mcent = (Mbins[1:] + Mbins[:-1]) / 2. #Now divide by the volume: dndm /= box**3 if h0: dndm /= hub**4 return Mcent, dndm
def temporary_data(): import bigfile try: data = numpy.empty(1024, dtype=[('Position', ('f8', 3)), ('Velocity', ('f8', 3))]) data['Position'] = numpy.random.random(size=(1024, 3)) data['Velocity'] = numpy.random.random(size=(1024, 3)) tmpdir = tempfile.mkdtemp() with bigfile.BigFile(tmpdir, create=True) as tmpff: with tmpff.create("Position", dtype=('f4', 3), size=1024) as bb: bb.write(0, data['Position']) with tmpff.create("Velocity", dtype=('f4', 3), size=1024) as bb: bb.write(0, data['Velocity']) with tmpff.create("Header") as bb: bb.attrs['Size'] = 1024. yield (data, tmpdir) except: raise finally: shutil.rmtree(tmpdir)
def get_group_property(output_path, group_property, desired_redshift): output_redshift, output_snapshot = desired_redshift_to_output_redshift( output_path, desired_redshift) pig = bigfile.BigFile(output_path + 'PIG_%s' % output_snapshot) return pig.open('FOFGroups/%s' % (group_property))[:], output_redshift
def threshold(snapshot, a0=0, dim=3): """Calculates KDE for halo positions, retains those in regions with KDE greater than the mean and saves into a file. Also saves a file with the smoothing length used and the boxsize (for use with scms algorithm). Inputs are PIG snapshot path (from run directory), smoothing length factor (0 means use default smoothing of 2 Mpc), and number of dimensions (defaults to 3)""" #--------------- Define function to handle the periodicity of the box # the input array should be separations between objects def perio(sep, boxsize): # if the distance between particles is greater than half the boxsize, # change to the periodic distance (i.e. "off the map") sep[np.where( sep > boxsize / 2.)] = -boxsize + sep[np.where(sep > boxsize / 2.)] # if the distance between particles is less than half the negative boxsize, # change to the periodic distance (i.e. "off the map") sep[np.where(sep < -boxsize / 2.)] = boxsize + sep[np.where(sep < -boxsize / 2.)] return (sep) #--------------- Import the snapshot parameters and make halo position arrays pig = bigfile.BigFile(snapshot) x = pig["FOFGroups/MassCenterPosition"][:][:, 0] # in kpc y = pig["FOFGroups/MassCenterPosition"][:][:, 1] z = pig["FOFGroups/MassCenterPosition"][:][:, 2] n_halos = x.size # number of halos boxsize = pig["Header"].attrs["BoxSize"][0] # boxsize in kpc print("Number of Halos:", n_halos) #--------------- Calculate the (Gaussian) kernel density estimator start = time.time() if a0 != 0: sig = np.min((np.std(x), np.std(y), np.std(z))) # calculated smoothing length smoothing = a0 / ((dim + 2)**(1. / (dim + 4))) * n_halos**(-1. / (dim + 4)) * sig else: smoothing = 2000. # good generic value for 64 Mpc box kde = np.zeros(n_halos) for i in range(n_halos): # distance between ith halo and all halos (squared) dist = perio(x[i] - x, boxsize)**2 + perio( y[i] - y, boxsize)**2 + perio(z[i] - z, boxsize)**2 kde[i] = np.sum(np.exp(-dist / (2. * smoothing**2))) end = time.time() print("Time for KDE calculation: ", np.round((end - start) / 60, 3), "minutes") #--------------- Cut out particles in the lowest density regions tau = np.mean(kde) # cutoff threshold for density cuts = np.where(kde > tau)[0] # indices for positions that meet criteria n_halos = cuts.size print('% Halos Remaining:', np.round(100. * n_halos / x.size, 1)) #--------------- Save retained halo positions and masses into a file halos = np.array([x[cuts], y[cuts], z[cuts]]) masses = pig["FOFGroups/Mass"][:][cuts] df1 = pd.DataFrame({'x': halos[0], 'y': halos[1], 'z': halos[2]}) df2 = pd.DataFrame({'mass': masses}) dff = pd.concat([df1, df2], ignore_index=False, axis=1) dff.to_csv(snapshot + 'threshold.out', sep=',', na_rep=-1, index=False) #--------------- Save smoothing length and boxsize into file np.savetxt(snapshot + 'params.out', np.array([smoothing, boxsize]))
def load_snapshot_header(output_path, desired_redshift): output_redshift, snapshot_number = desired_redshift_to_output_redshift( output_path, desired_redshift) pig = bigfile.BigFile(output_path + 'PIG_%s' % snapshot_number) header = pig.open('/Header') return header
return r, DD else: return None, None if (z == 8): pig = '086' if (z == 9): pig = '066' if (z == 10): pig = '054' if (z == 7.5): pig = '141' galaxy = bigfile.BigFile( '/nfs/nas-0-1/akbhowmi/my_galaxy_sample_bluetides_closest_comoving_distance_with_luminosity/' + pig + '/') SM = galaxy.open('galaxy_stellar_mass')[:] HM = galaxy.open('galaxy_host_mass')[:] hostid = galaxy.open('galaxy_host_id')[:] tag = galaxy.open('central_satellite_tag')[:] galaxy_positions = galaxy.open('galaxy_center_of_mass')[:] lcuts = numpy.array([7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5]) lcuts += 0.25 for lcut in reversed(lcuts): mask = SM > 10**lcut data_s = galaxy_positions[mask] N = len(data_s)
def get_particle_property(output_path, particle_property, p_type, desired_redshift): output_redshift, output_snapshot = desired_redshift_to_output_redshift( output_path, desired_redshift) pig = bigfile.BigFile(output_path + 'PIG_%s' % output_snapshot) return pig.open('%d/%s' % (p_type, particle_property))[:], output_redshift
def fetch(self, column, start, end): import bigfile with bigfile.BigFile(self.cachedir) as bf: return bf[column][start:end]