示例#1
0
def compare_fields(newsnap, oldsnap, ptype=1, field="Position"):
    """Compare two fields in a snapshot (by default DM positions):
       newsnap and oldsnap are compared.
       the 'field' array is compared for particle type 'ptype'.
       Returns the absolute value of the differences."""
    pp_old = bigfile.BigFile(oldsnap)
    box = pp_old["Header"].attrs["BoxSize"]
    otime = pp_old["Header"].attrs["Time"]
    pp_new = bigfile.BigFile(newsnap)
    ntime = pp_new["Header"].attrs["Time"]
    nbox = pp_new["Header"].attrs["BoxSize"]
    assert np.abs(otime - ntime) < 1e-8
    assert np.abs(box - nbox) < 1e-8
    sptype = str(ptype)
    id_new = pp_new[sptype + "/ID"][:]
    id_old = pp_old[sptype + "/ID"][:]
    pos_new = pp_new[sptype + "/" + field][:]
    pos_old = pp_old[sptype + "/" + field][:]
    p_sort_new = pos_new[np.argsort(id_new)]
    p_sort_old = pos_old[np.argsort(id_old)]
    diff = p_sort_new - p_sort_old
    #Positions wrap, so the differences
    if field == "Position":
        ii = np.where(diff > box / 2)
        diff[ii] = diff[ii] - box
        ii = np.where(diff < -box / 2)
        diff[ii] = diff[ii] + box
    return np.abs(diff)
示例#2
0
def complex_to_fastpm(fn, ds, complex, BoxSize):
    """
        >>> c = numpy.fft.rfftn(numpy.random.normal(size=(128, 128, 128)))

        >>> complex_to_fastpm("IC", "Copy", c, 128.)

        >>> a = bigfile.BigFile("IC")['Copy'][:]
        >>> print(a[1], c.ravel()[1])
        >>> print(a[300], c.ravel()[300])
    """
    import bigfile
    import numpy

    bf = bigfile.BigFile(fn, create=True)
    bb = bf.create(ds, complex.dtype, complex.size, Nfile=1)
    Nmesh = complex.shape[0]

    bb.attrs['Nmesh'] = Nmesh
    bb.attrs['BoxSize'] = BoxSize
    # This ensures we write the array as C-Contiguous
    bb.write(0, complex)
    # Thus we set the strides and shape accordingly for FastPM to pick up.
    bb.attrs['ndarray.ndim'] = 3
    bb.attrs['ndarray.shape'] = (Nmesh, Nmesh, Nmesh)
    bb.attrs['ndarray.strides'] = (Nmesh * (Nmesh // 2 + 1), Nmesh // 2 + 1, 1)
示例#3
0
def write_big_file(bfname, hdf5name):
    """Find all the HDF5 files in the snapshot and merge them into a bigfile."""
    #Find all the HDF5 snapshot set.
    hdf5_files = glob.glob(hdf5name)
    if len(hdf5_files) == 0:
        hdf5_files = glob.glob(hdf5name + ".*.hdf5")
    elif os.path.isdir(hdf5_files[0]):
        hdf5_files = glob.glob(
            os.path.join(hdf5name, "*_[0-9][0-9][0-9].*.hdf5"))
    if len(hdf5_files) == 0:
        raise IOError("Could not find hdF5 snapshot as %s (.*.hdf5)" %
                      hdf5name)
    #Sort so we get a consistent answer each time.
    hdf5_files = sorted(hdf5_files)
    if not h5py.is_hdf5(hdf5_files[0]):
        raise IOError("%s is not hdf5!" % hdf5_files[0])
    hdf5 = h5py.File(hdf5_files[0], 'r')
    bf = bigfile.BigFile(bfname, create=True)
    atime = write_bigfile_header(hdf5, bf)
    for n in range(6):
        bf.create(str(n))
    create_big_file_arrays(bf, hdf5)
    hdf5.close()
    startpart = np.zeros(6, dtype=np.int)
    for hfile in hdf5_files:
        startpart = write_bf_segment(bf, hfile, startpart, atime)
        print("Copied HDF file %s" % hfile)
示例#4
0
    def open(cls, filename, pool=None, header_kwargs=dict(), **kwargs):

        if bigfile is None:
            raise ImportError("bigfile must be installed!")

        fp = bigfile.BigFile(cls.buildFilename(filename, pool, **kwargs))
        return cls(fp, pool, header_kwargs=header_kwargs)
def get_particle_property_within_groups(output_path, particle_property, p_type,
                                        desired_redshift, group_index):
    output_redshift, output_snapshot = desired_redshift_to_output_redshift(
        output_path, desired_redshift)
    pig = bigfile.BigFile(output_path + 'PIG_%s' % output_snapshot)
    GroupID = pig.open('%d/GroupID' % (p_type))[:]
    finalproperty = pig.open('%d/%s' % (p_type, particle_property))[:]
    return finalproperty[GroupID == GroupID[group_index]], output_redshift
def get_box_size(output_path):
    output_file_names = os.listdir(output_path)
    snapshot_space = []
    redshift_space = []
    for name in output_file_names:
        if ('PIG' in name):
            snapshot_number = name[4:]
            pig = bigfile.BigFile(output_path + 'PIG_%s' % snapshot_number)
            header = pig.open('/Header')
            return header.attrs['BoxSize'][0]
示例#7
0
 def __init__(self, num, base):
     fname = base
     snap = str(num).rjust(3, '0')
     new_fname = os.path.join(base, "PART_" + snap)
     #Check for snapshot directory
     if os.path.exists(new_fname):
         fname = new_fname
     self._f_handle = bigfile.BigFile(fname, 'r')
     if "Header" not in self._f_handle.blocks:
         raise IOError("No BigFile snapshot at", new_fname)
     AbstractSnapshot.__init__(self)
def test_save_state():
    """
  Tests the BigFile saving function
  """
    klin = np.loadtxt('flowpm/data/Planck15_a1p00.txt').T[0]
    plin = np.loadtxt('flowpm/data/Planck15_a1p00.txt').T[1]
    ipklin = iuspline(klin, plin)
    a0 = 0.1
    nc = [16, 16, 16]
    boxsize = [100., 100., 100.]
    cosmo = flowpm.cosmology.Planck15()

    initial_conditions = flowpm.linear_field(
        nc,  # size of the cube
        boxsize,  # Physical size of the cube
        ipklin,  # Initial powerspectrum
        batch_size=2)

    # Sample particles
    state = flowpm.lpt_init(cosmo, initial_conditions, a0)

    with tempfile.TemporaryDirectory() as tmpdirname:
        filename = tmpdirname + '/testsave'
        save_state(cosmo, state, a0, nc, boxsize, filename)

        # Now try to reload the information using BigFile
        bf = bigfile.BigFile(filename)

        # Testing recovery of header
        header = bf['Header']
        assert_allclose(np.array(header.attrs['NC']), np.array(nc))
        assert_allclose(np.array(header.attrs['BoxSize']), np.array(boxsize))
        assert_allclose(np.array(header.attrs['OmegaCDM']),
                        np.array(cosmo.Omega_c))
        assert_allclose(np.array(header.attrs['OmegaB']),
                        np.array(cosmo.Omega_b))
        assert_allclose(np.array(header.attrs['OmegaK']),
                        np.array(cosmo.Omega_k))
        assert_allclose(np.array(header.attrs['h']), np.array(cosmo.h))
        assert_allclose(np.array(header.attrs['Sigma8']),
                        np.array(cosmo.sigma8))
        assert_allclose(np.array(header.attrs['w0']), np.array(cosmo.w0))
        assert_allclose(np.array(header.attrs['wa']), np.array(cosmo.wa))
        assert_allclose(np.array(header.attrs['Time']), np.array(a0))

        # Testing recovery of data
        pos = bf['1/Position']
        assert_allclose(pos[:], state[0, 1].numpy() / nc[0] * boxsize[0])
        vel = bf['1/Velocity']
        assert_allclose(vel[:], state[1, 1].numpy() / nc[0] * boxsize[0])

        # Closing file
        bf.close()
示例#9
0
    def __init__(self, cachedir, aliases):
        import bigfile
        self.cachedir = cachedir

        with bigfile.BigFile(cachedir, create=True) as bf:
            bd = bigfile.BigData(bf)

            self._size = bd.size
            self._dtype = bd.dtype

        self.aliases = dict([(new, (old, transform))
                             for old, new, transform in aliases])
        ColumnStore.__init__(self)
示例#10
0
    def read_ptype(self, ptype, columns, full):
        f = bigfile.BigFile(self.path)
        done = False
        i = 0
        while not numpy.all(self.comm.allgather(done)):
            ret = []
            for column in columns:
                f = bigfile.BigFile(self.path)
                read_column = column
                if self.subsample:
                    if ptype in ("0", "1"):
                        read_column = read_column + '.sample'

                if ptype == 'FOFGroups':
                    if column == 'Position':
                        read_column = 'MassCenterPosition'
                    if column == 'Velocity':
                        read_column = 'MassCenterVelocity'

                cdata = f['%s/%s' % (ptype, read_column)]

                Ntot = cdata.size
                start = self.comm.rank * Ntot // self.comm.size
                end = (self.comm.rank + 1) * Ntot // self.comm.size
                if not full:
                    bunchstart = start + i * self.bunchsize
                    bunchend = start + (i + 1) * self.bunchsize
                    if bunchend > end: bunchend = end
                    if bunchstart > end: bunchstart = end
                else:
                    bunchstart = start
                    bunchend = end
                if bunchend == end:
                    done = True
                data = cdata[bunchstart:bunchend]
                ret.append(data)
            i = i + 1
            yield ret
示例#11
0
    def read(self, columns, start, stop, step=1):
        """
        Read the specified column(s) over the given range,
        as a dictionary

        'start' and 'stop' should be between 0 and :attr:`size`,
        which is the total size of the binary file (in particles)
        """
        import bigfile
        if isinstance(columns, string_types): columns = [columns]

        with bigfile.BigFile(filename=self.path)[self.dataset] as f:
            ds = bigfile.BigData(f, columns)
            return ds[start:stop][::step]
示例#12
0
def write_all_hdf_files(hdf5name, bfname):
    """Work out which particle set goes to which HDF5 file and write it."""
    bf = bigfile.BigFile(bfname, 'r')
    nfiles = compute_nfiles(bf["Header"].attrs["TotNumPart"])
    if not os.path.exists(hdf5name):
        os.mkdir(hdf5name)
    mm = re.search("PART_([0-9]*)", bfname)
    nsnap = '000'
    if len(mm.groups()) > 0:
        nsnap = mm.groups()[0]
    hdf5name = os.path.join(hdf5name, "snap_" + nsnap)
    print("Writing %d hdf snapshot files to %s" % (nfiles, hdf5name))
    for nn in range(nfiles):
        write_hdf_file(bf, hdf5name, nn, nfiles)
        print("Wrote file %d" % nn)
示例#13
0
def load_bigfile(filename, dataset):
    """
    Load a bigfile using ``bigfile``, returning a ``numpy`` array
    
    Parameters
    ----------
    dataset : str
        the name of the HDF5 dataset to load

    Returns
    -------
    array_like :
        the loaded data
    """
    import bigfile
    return bigfile.BigFile(filename)[dataset][...]
示例#14
0
    def build(self):

        files = self.listfiles()

        bf = bigfile.BigFile(self.destdir, create=True)

        fulldtype = fits.read_table(files[0]).dtype

        dtype = [(column, fulldtype[column]) for column in self.columns
                 if column is not 'BRICK_PRIMARY']

        dtype.append(('BRICK_PRIMARY', '?'))
        dtype = numpy.dtype(dtype)

        sizes = []
        for filename in files:
            sizes.append(fits.size_table(filename))

        sizes = numpy.array(sizes)
        offsets = numpy.concatenate([[0], numpy.cumsum(sizes)])

        print("total number of objects:", sizes.sum())

        blocks = {}
        for column in self.columns:
            blocks[column] = bf.create(column,
                                       dtype=dtype[column],
                                       size=sizes.sum(),
                                       Nfile=1)

        for i, filename in enumerate(files):
            onefile = fits.read_table(filename)
            for column in self.columns:
                onedata = numpy.empty(len(onefile), dtype=dtype[column])

                if column != 'BRICK_PRIMARY':
                    onedata[...] = onefile[column]
                else:
                    try:
                        onedata[...] = onefile[column]
                    except:
                        onedata[...] = True
                blocks[column].write(offsets[i], onedata)
            print(filename, 'done')
示例#15
0
    def __init__(self, path, exclude=None, header=Automatic, dataset='./'):

        if not dataset.endswith('/'): dataset = dataset + '/'

        import bigfile

        self.dataset = dataset
        self.path = path

        # store the attributes
        self.attrs = {}

        # the file path
        with bigfile.BigFile(filename=path) as ff:
            columns = ff[self.dataset].blocks
            if header is Automatic:
                for header in ['Header', 'header', './']:
                    if header in columns: break

            if exclude is None:
                exclude = [header]

            columns = list(set(columns) - set(exclude))

            ds = bigfile.BigData(ff[self.dataset], columns)

            # set the data type and size
            self.dtype = ds.dtype
            self.size = ds.size

            header = ff[header]
            attrs = header.attrs

            # copy over the attrs
            for k in attrs.keys():

                # load a JSON representation if str starts with json:://
                if isinstance(attrs[k],
                              string_types) and attrs[k].startswith('json://'):
                    self.attrs[k] = json.loads(attrs[k][7:], cls=JSONDecoder)
                # copy over an array
                else:
                    self.attrs[k] = numpy.array(attrs[k], copy=True)
示例#16
0
 def __init__(self, num, base, comm=None):
     self.comm = comm
     if self.comm is not None:
         self.rank = comm.Get_rank()
         self.size = comm.Get_size()
     else:
         self.size = 1
         self.rank = 0
     self.parts_rank = None
     fname = base
     snap = str(num).rjust(3, '0')
     new_fname = os.path.join(base, "PART_" + snap)
     #Check for snapshot directory
     if os.path.exists(new_fname):
         fname = new_fname
     self._f_handle = bigfile.BigFile(fname, 'r')
     if "Header" not in self._f_handle.blocks:
         raise IOError("No BigFile snapshot at", new_fname)
     AbstractSnapshot.__init__(self)
示例#17
0
    def parallel_read(self, columns, full=False):

        f = bigfile.BigFile(self.path)
        header = f['header']
        boxsize = header.attrs['BoxSize'][0]

        ptypes = self.ptypes
        readcolumns = []
        for column in columns:
            if column == 'HI':
                if 'Mass' not in readcolumns:
                    readcolumns.append('Mass')
                if 'NeutralHydrogenFraction' not in readcolumns:
                    readcolumns.append('NeutralHydrogenFraction')
            else:
                readcolumns.append(column)

        readcolumns = readcolumns + self.load
        for ptype in ptypes:
            for data in self.read_ptype(ptype, readcolumns, full):
                P = dict(zip(readcolumns, data))
                if 'HI' in columns:
                    P['HI'] = P['NeutralHydrogenFraction'] * P['Mass']

                if 'Position' in columns:
                    P['Position'][:] *= self.BoxSize / boxsize
                    P['Position'][:] %= self.BoxSize

                if 'Velocity' in columns:
                    raise NotImplementedError

                if self.select is not None:
                    mask = self.select.get_mask(P)
                else:
                    mask = Ellipsis
                toret = []
                for column in columns:
                    d = P.get(column, None)
                    if d is not None:
                        d = d[mask]
                    toret.append(d)
                yield toret
def get_snapshot_redshift_correspondence(output_path):
    output_file_names = os.listdir(output_path)
    #    print(output_file_names)
    #    print(output_path)
    snapshot_space = []
    redshift_space = []
    for name in output_file_names:
        if ('PIG' in name):
            try:
                snapshot_number = name[4:]
                snapshot_space.append(snapshot_number)
            except:
                print("Warning: Ignoring filename:%s" % name)
    snapshot_space = numpy.sort(numpy.array(snapshot_space))
    for snapshot_number in snapshot_space:
        pig = bigfile.BigFile(output_path + 'PIG_%s' % snapshot_number)
        header = pig.open('/Header')
        redshift = 1. / header.attrs['Time'][0] - 1
        redshift_space.append(redshift)
    return numpy.array(snapshot_space), numpy.array(redshift_space)
示例#19
0
def HMFFromFOF(foftable, h0=False, bins='auto'):
    """Print a conventionally normalised halo mass function from the FOF tables.
    Units returned are:
    dn/dM (M_sun/Mpc^3) (comoving) Note no little-h!
    If h0 == True, units are dn/dM (h^4 M_sun/Mpc^3)
    bins specifies the number of evenly spaced bins if an integer,
    or one of the strings understood by numpy.histogram."""
    bf = bigfile.BigFile(foftable)
    #1 solar in g
    msun_in_g = 1.989e33
    #1 Mpc in cm
    Mpc_in_cm = 3.085678e+24
    #In units of 10^10 M_sun by default.
    try:
        imass_in_g = bf["Header"].attrs["UnitMass_in_g"]
    except KeyError:
        imass_in_g = 1.989e43
    #Length in units of kpc/h by default
    try:
        ilength_in_cm = bf["Header"].attrs["UnitLength_in_cm"]
    except KeyError:
        ilength_in_cm = 3.085678e+21
    hub = bf["Header"].attrs["HubbleParam"]
    box = bf["Header"].attrs["BoxSize"]
    #Convert to Mpc from kpc/h:
    box *= ilength_in_cm / hub / Mpc_in_cm
    masses = bf["FOFGroups/Mass"][:]
    #This is N(M) evenly spaced in log(M)
    NM, Mbins = np.histogram(np.log10(masses), bins=bins)
    #Convert Mbins to Msun
    Mbins = 10**Mbins
    Mbins *= (imass_in_g / msun_in_g)
    #Find dM:
    #This is dn/dM (Msun)
    dndm = NM / (Mbins[1:] - Mbins[:-1])
    Mcent = (Mbins[1:] + Mbins[:-1]) / 2.
    #Now divide by the volume:
    dndm /= box**3
    if h0:
        dndm /= hub**4
    return Mcent, dndm
示例#20
0
def temporary_data():

    import bigfile
    try:
        data = numpy.empty(1024,
                           dtype=[('Position', ('f8', 3)),
                                  ('Velocity', ('f8', 3))])
        data['Position'] = numpy.random.random(size=(1024, 3))
        data['Velocity'] = numpy.random.random(size=(1024, 3))

        tmpdir = tempfile.mkdtemp()
        with bigfile.BigFile(tmpdir, create=True) as tmpff:
            with tmpff.create("Position", dtype=('f4', 3), size=1024) as bb:
                bb.write(0, data['Position'])
            with tmpff.create("Velocity", dtype=('f4', 3), size=1024) as bb:
                bb.write(0, data['Velocity'])
            with tmpff.create("Header") as bb:
                bb.attrs['Size'] = 1024.

        yield (data, tmpdir)
    except:
        raise
    finally:
        shutil.rmtree(tmpdir)
def get_group_property(output_path, group_property, desired_redshift):
    output_redshift, output_snapshot = desired_redshift_to_output_redshift(
        output_path, desired_redshift)
    pig = bigfile.BigFile(output_path + 'PIG_%s' % output_snapshot)
    return pig.open('FOFGroups/%s' % (group_property))[:], output_redshift
示例#22
0
def threshold(snapshot, a0=0, dim=3):
    """Calculates KDE for halo positions, retains those in regions with KDE
    greater than the mean and saves into a file. Also saves a file with the
    smoothing length used and the boxsize (for use with scms algorithm).

    Inputs are PIG snapshot path (from run directory),
    smoothing length factor (0 means use default smoothing of 2 Mpc),
    and number of dimensions (defaults to 3)"""

    #--------------- Define function to handle the periodicity of the box
    # the input array should be separations between objects
    def perio(sep, boxsize):
        # if the distance between particles is greater than half the boxsize,
        # change to the periodic distance (i.e. "off the map")
        sep[np.where(
            sep > boxsize / 2.)] = -boxsize + sep[np.where(sep > boxsize / 2.)]
        # if the distance between particles is less than half the negative boxsize,
        # change to the periodic distance (i.e. "off the map")
        sep[np.where(sep < -boxsize /
                     2.)] = boxsize + sep[np.where(sep < -boxsize / 2.)]
        return (sep)

#--------------- Import the snapshot parameters and make halo position arrays

    pig = bigfile.BigFile(snapshot)
    x = pig["FOFGroups/MassCenterPosition"][:][:, 0]  # in kpc
    y = pig["FOFGroups/MassCenterPosition"][:][:, 1]
    z = pig["FOFGroups/MassCenterPosition"][:][:, 2]
    n_halos = x.size  # number of halos
    boxsize = pig["Header"].attrs["BoxSize"][0]  # boxsize in kpc
    print("Number of Halos:", n_halos)

    #--------------- Calculate the (Gaussian) kernel density estimator
    start = time.time()
    if a0 != 0:
        sig = np.min((np.std(x), np.std(y), np.std(z)))
        # calculated smoothing length
        smoothing = a0 / ((dim + 2)**(1. /
                                      (dim + 4))) * n_halos**(-1. /
                                                              (dim + 4)) * sig
    else:
        smoothing = 2000.  # good generic value for 64 Mpc box
    kde = np.zeros(n_halos)
    for i in range(n_halos):
        # distance between ith halo and all halos (squared)
        dist = perio(x[i] - x, boxsize)**2 + perio(
            y[i] - y, boxsize)**2 + perio(z[i] - z, boxsize)**2
        kde[i] = np.sum(np.exp(-dist / (2. * smoothing**2)))
    end = time.time()
    print("Time for KDE calculation: ", np.round((end - start) / 60, 3),
          "minutes")

    #--------------- Cut out particles in the lowest density regions
    tau = np.mean(kde)  # cutoff threshold for density
    cuts = np.where(kde > tau)[0]  # indices for positions that meet criteria
    n_halos = cuts.size
    print('% Halos Remaining:', np.round(100. * n_halos / x.size, 1))

    #--------------- Save retained halo positions and masses into a file
    halos = np.array([x[cuts], y[cuts], z[cuts]])
    masses = pig["FOFGroups/Mass"][:][cuts]

    df1 = pd.DataFrame({'x': halos[0], 'y': halos[1], 'z': halos[2]})
    df2 = pd.DataFrame({'mass': masses})
    dff = pd.concat([df1, df2], ignore_index=False, axis=1)
    dff.to_csv(snapshot + 'threshold.out', sep=',', na_rep=-1, index=False)

    #--------------- Save smoothing length and boxsize into file
    np.savetxt(snapshot + 'params.out', np.array([smoothing, boxsize]))
def load_snapshot_header(output_path, desired_redshift):
    output_redshift, snapshot_number = desired_redshift_to_output_redshift(
        output_path, desired_redshift)
    pig = bigfile.BigFile(output_path + 'PIG_%s' % snapshot_number)
    header = pig.open('/Header')
    return header
示例#24
0
        return r, DD
    else:
        return None, None


if (z == 8):
    pig = '086'
if (z == 9):
    pig = '066'
if (z == 10):
    pig = '054'
if (z == 7.5):
    pig = '141'

galaxy = bigfile.BigFile(
    '/nfs/nas-0-1/akbhowmi/my_galaxy_sample_bluetides_closest_comoving_distance_with_luminosity/'
    + pig + '/')
SM = galaxy.open('galaxy_stellar_mass')[:]
HM = galaxy.open('galaxy_host_mass')[:]
hostid = galaxy.open('galaxy_host_id')[:]
tag = galaxy.open('central_satellite_tag')[:]
galaxy_positions = galaxy.open('galaxy_center_of_mass')[:]

lcuts = numpy.array([7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5])
lcuts += 0.25

for lcut in reversed(lcuts):
    mask = SM > 10**lcut
    data_s = galaxy_positions[mask]

    N = len(data_s)
def get_particle_property(output_path, particle_property, p_type,
                          desired_redshift):
    output_redshift, output_snapshot = desired_redshift_to_output_redshift(
        output_path, desired_redshift)
    pig = bigfile.BigFile(output_path + 'PIG_%s' % output_snapshot)
    return pig.open('%d/%s' % (p_type, particle_property))[:], output_redshift
示例#26
0
 def fetch(self, column, start, end):
     import bigfile
     with bigfile.BigFile(self.cachedir) as bf:
         return bf[column][start:end]