示例#1
0
def main(ns):
    def copy(bbi, bbo, chunksize=1024*1024):
        for i in range(0, bbi.size, chunksize):
            bbo.write(i, bbi[i:i+chunksize])

    with bigfile.File(ns.source) as bfi, \
         bigfile.File(ns.dest, create=True) as bfo:
        header = bfi['Header']
        with bfi['1/Position'] as bbi:
            with bfo.create('1/Position', dtype=bbi.dtype, size=bbi.size, Nfile=bbi.Nfile) as bbo:
                copy(bbi, bbo)
            npart = bbi.size
            Nfile = bbi.Nfile
        with bfi['1/Velocity'] as bbi:
            with bfo.create('1/Velocity', dtype=bbi.dtype, size=bbi.size, Nfile=bbi.Nfile) as bbo:
                copy(bbi, bbo)
        with bfi['1/ID'] as bbi:
            with bfo.create('1/ID', dtype=bbi.dtype, size=bbi.size, Nfile=bbi.Nfile) as bbo:
                copy(bbi, bbo)

        with bfo.create('1/Mass', dtype='f4', size=npart, Nfile=Nfile) as bbo:
            mass = numpy.broadcast_to(header.attrs['MassTable'][1], npart)
            copy(mass, bbo)

        with bfo.create('Header') as bbo:
            for key in header.attrs:
                bbo.attrs[key] = header.attrs[key]
def load_field_chunk_bigfile(field_name,
                             dens_dir,
                             R_smooth,
                             n_gr,
                             i_chunk,
                             n_chunks,
                             Lbox,
                             padding=40.):
    # if not smoothing
    if field_name == 'delta' and int(R_smooth) == 0:
        data = bigfile.File(dens_dir + "density_%d.bigfile" % n_gr)['Field']
    else:
        data = bigfile.File(dens_dir + field_name +
                            "_%d.bigfile" % R_smooth)['Field']
    assert n_gr == int(np.round(
        data.size**(1. / 3))), "Data is incompatible with the input"

    grid_size = Lbox / n_gr
    chunk_size = Lbox / n_chunks
    assert grid_size < chunk_size, "The chunk size must be larger than the cell size"

    # starting and finishing index in the grid
    i1, i2 = (np.array(
        [i_chunk * chunk_size - padding,
         (i_chunk + 1) * chunk_size + padding]) // grid_size).astype(int)
    gr_depth = i2 - i1

    # make sure within box
    i1 %= n_gr
    i2 %= n_gr
    # get coordinates in the box of loaded field
    start = (i1 * grid_size) % Lbox
    end = ((i2 + 1) * grid_size) % Lbox
    # convert to indices in bigfile
    i1 *= n_gr**2
    i2 *= n_gr**2
    if i1 > i2:
        data1 = data[i1:]
        data2 = data[:i2]

        n = len(data1) + len(data2)
        field_chunk = np.zeros(n, dtype=np.float32)

        field_chunk[:len(data1)] = data1
        field_chunk[len(data1):] = data2
        del data1, data2
        field_chunk = field_chunk.reshape((gr_depth, n_gr, n_gr))
    else:
        field_chunk = data[i1:i2].reshape((gr_depth, n_gr, n_gr))

    return field_chunk, start, end
示例#3
0
    def read(self, columns, start, stop, step=1):
        """
        Read the specified column(s) over the given range,
        as a dictionary

        'start' and 'stop' should be between 0 and :attr:`size`,
        which is the total size of the binary file (in particles)
        """
        import bigfile

        with bigfile.File(filename=self.path)[self.dataset] as f:
            ds = bigfile.Dataset(f, columns)
            return ds[start:stop][::step]
示例#4
0
    def __init__(self, path, exclude=None, header=Automatic, dataset='./'):

        if not dataset.endswith('/'): dataset = dataset + '/'

        import bigfile

        self.dataset = dataset
        self.path = path

        # store the attributes
        self.attrs = {}

        # the file path
        with bigfile.File(filename=path) as ff:
            columns = [block for block in ff[self.dataset].blocks]
            headers = self._find_headers(header, dataset, ff)

            if exclude is None:
                # by default exclude header only.
                exclude = headers

            if not isinstance(exclude, (list, tuple)):
                exclude = [exclude]

            columns = [
                column for column in set(columns)
                if not any(fnmatch(column, e) for e in exclude)
            ]

            ds = bigfile.Dataset(ff[self.dataset], columns)

            # set the data type and size
            self.dtype = ds.dtype
            self.size = ds.size

            headers = [ff[header] for header in headers]
            all_attrs = [header.attrs for header in headers]
            for attrs in all_attrs:
                # copy over the attrs
                for k in attrs.keys():

                    # load a JSON representation if str starts with json:://
                    if isinstance(
                            attrs[k],
                            string_types) and attrs[k].startswith('json://'):
                        self.attrs[k] = json.loads(attrs[k][7:],
                                                   cls=JSONDecoder)
                    # copy over an array
                    else:
                        self.attrs[k] = numpy.array(attrs[k], copy=True)
示例#5
0
def main(ns):

    f = bigfile.File(ns.source)
    ds = bigfile.Dataset(f['1/'], ['Position', 'Velocity', 'ID'])

    header = f['Header']
    print("---input file : %s -----", ns.source)
    for key in header.attrs:
        print(key, header.attrs[key])

    gadget_header = make_gadget_header(header)

    dirname = os.path.dirname(os.path.abspath(ns.dest))
    if not os.path.exists(dirname):
        print("making dir")
        os.makedirs(dirname)

    convert(ns.dest, gadget_header, ds, ns.precision)
示例#6
0
def gen_comp(zs):
    f = bigfile.File(folder + 'wlen_jliu/WL-%.2f-N4096' % (zs))

    nside = f['kappa'].attrs['nside'][0]
    zmin = f['kappa'].attrs['zlmin'][0]
    zmax = f['kappa'].attrs['zlmax'][0]
    #zstep = f['kappa'].attrs['zstep'][0]
    zs = f['kappa'].attrs['zs'][0]

    print('nside = ', nside)
    print('redshifts = ', zs)

    lmax = min([5000, nside])  #5000
    ell_sim = np.arange(lmax + 1)
    print(f['kappa'][:].shape)
    fn_cl = folder + '/clkk/kappa_cl_z%.2f.npz' % (zs)
    if not os.path.isfile(fn_cl):
        cl = hp.anafast(f['kappa'][:], lmax=lmax)
        np.savez(fn_cl, ell=ell_sim, cl=cl)
示例#7
0
def main(ns):
    cat = Gadget1Catalog(ns.source, ptype=1)

    attrs = cat.attrs.copy()
    cat.attrs.clear()

    cat.attrs['MassTable'] = attrs['Massarr']
    cat.attrs['TotNumPart'] = numpy.int64(attrs['Nall']) + (numpy.int64(attrs['NallHW']) << 32)
    cat.attrs['TotNumPartInit'] = numpy.int64(attrs['Nall']) + (numpy.int64(attrs['NallHW']) << 32)
    cat.attrs['BoxSize'] = attrs['BoxSize']
    cat.attrs['Time'] = attrs['Time']
    cat.attrs['ScalingFactor'] = attrs['Time']

    if ns.time_ic is None:
        ns.time_ic = attrs['Time']
    cat.attrs['TimeIC'] = ns.time_ic

    cat.attrs['UnitVelocity_in_cm_per_s'] = 1e5
    if ns.unit_system == 'Mpc':
        cat.attrs['UnitLength_in_cm'] = 3.085678e24
    if ns.unit_system == 'Kpc':
        cat.attrs['UnitLength_in_cm'] = 3.085678e21

    cat.attrs['UnitMass_in_g'] = 1.989e43

    # The velocity convention is weird without this
    cat.attrs['UsePeculiarVelocity'] = True

    a = attrs['Time']
    cat['Velocity'] = cat['GadgetVelocity'] * a ** 0.5

    if ns.subsample is not None:
        cat = cat[::ns.subsample]

    cat.save(ns.dest, columns=['Position', 'Velocity', 'ID'], dataset='1', header='Header')
    with bigfile.File(ns.dest) as bf:
        with bf.create("1") as bb:
            bb.attrs['a.x'] = attrs['Time']
            bb.attrs['a.v'] = attrs['Time']
            bb.attrs['M0'] = attrs['Massarr'][1]
def main(ns):
    if ns.zlmax is None:
        ns.zlmax = max(ns.zs)

    zs_list = ns.zs
    ###### JL hardcode zs_list
    #zs_list = numpy.arange(ns.zs, 2.21, 0.1)
    zs_list = ns.zs

    zlmin = ns.zlmin
    zlmax = zs_list[-1]#ns.zlmax

    # no need to be accurate here
    ds_list = Planck15.comoving_distance(zs_list)

    path = ns.source

    cat = BigFileCatalog(path, dataset=ns.dataset)

    kappa = 0
    Nm = 0
    kappabar = 0

    npix = healpix.nside2npix(ns.nside)
    localsize = npix * (cat.comm.rank + 1) // cat.comm.size - npix * (cat.comm.rank) // cat.comm.size
    nbar = (cat.attrs['NC'] ** 3  / cat.attrs['BoxSize'] ** 3 * cat.attrs['ParticleFraction'])[0]
 #   print('DEBUG BoxSize', cat.attrs['BoxSize'])
    
    Nsteps = int(numpy.round((zlmax - zlmin) / ns.zstep))
    if Nsteps < 2 : Nsteps = 2

    z = numpy.linspace(zlmax, zlmin, Nsteps+1, endpoint=True)

    if cat.comm.rank == 0:
        cat.logger.info("Splitting data redshift bins %s" % str(z))

    kappa_all = numpy.zeros((Nsteps, len(zs_list), localsize))
    for i, (z1, z2) in enumerate(zip(z[:-1], z[1:])):
        import gc
        gc.collect()
        if cat.comm.rank == 0:
            cat.logger.info("nbar = %g, zlmin = %g, zlmax = %g zs = %s" % (nbar, z2, z1, zs_list))

        slice = read_range(cat, 1/(1 + z1), 1 / (1 + z2))

        if slice.csize == 0: continue
        if cat.comm.rank == 0:
            cat.logger.info("read %d particles" % slice.csize)

        kappa1, kappa1bar, Nm1  = make_kappa_maps(slice, ns.nside, zs_list, ds_list, localsize, nbar)

        kappa = kappa + kappa1

        kappa_all[i] = kappa1
        
        Nm = Nm + Nm1
        kappabar = kappabar + kappa1bar

    cat.comm.barrier()

    if cat.comm.rank == 0:
        # use bigfile because it allows concurrent write to different datasets.
        cat.logger.info("writing to %s", ns.output)


    # array to get all map slices
    if cat.comm.rank == 0:
        kappa1_all = numpy.zeros((Nsteps, int(12*ns.nside**2)))
                                  
    for i, (zs, ds) in enumerate(zip(zs_list, ds_list)):
        std = numpy.std(cat.comm.allgather(len(kappa[i])))
        mean = numpy.mean(cat.comm.allgather(len(kappa[i])))
        if cat.comm.rank == 0:
            cat.logger.info("started gathering source plane %s, size-var = %g, size-bar = %g" % (zs, std, mean))

        kappa1 = GatherArray(kappa[i], cat.comm)
        Nm1 = GatherArray(Nm[i], cat.comm)

        # get slices of kappa map
        for j in range(Nsteps):
            kappa1_allj = GatherArray(kappa_all[j,i], cat.comm)
            if cat.comm.rank == 0:
                kappa1_all[j] = kappa1_allj
                
        if cat.comm.rank == 0:
            cat.logger.info("done gathering source plane %s" % zs)

        if cat.comm.rank == 0:
            fname = ns.output + "/WL-%02.2f-N%04d" % (zs, ns.nside)
            cat.logger.info("started writing source plane %s" % zs)

            with bigfile.File(fname, create=True) as ff:
                print('DEBUG', kappa1_all.shape, len(kappa1_all), numpy.dtype((kappa1_all.dtype, kappa1_all.shape[1:])))
                ds1 = ff.create_from_array("kappa", kappa1, Nfile=1)
                ds2 = ff.create_from_array("Nm", Nm1, Nfile=1)
                #ds3 = ff.create_from_array("kappa_all", kappa1_all.T, Nfile=1)#, memorylimit=1024*1024*1024)

                for d in ds1, ds2:#, ds3:
                    d.attrs['kappabar'] = kappabar[i]
                    d.attrs['nside'] = ns.nside
                    d.attrs['zlmin'] = zlmin
                    d.attrs['zlmax'] = zlmax
                    d.attrs['zstep'] = ns.zstep
                    d.attrs['zs'] = zs
                    d.attrs['ds'] = ds
                    d.attrs['nbar'] = nbar

        cat.comm.barrier()
        if cat.comm.rank == 0:
            # use bigfile because it allows concurrent write to different datasets.
            cat.logger.info("source plane at %g written. " % zs)
示例#9
0
def main(ns):
    if ns.zlmax is None:
        ns.zlmax = max(ns.zs)

    zs_list = ns.zs

    zlmin = ns.zlmin
    zlmax = ns.zlmax

    # no need to be accurate here
    ds_list = Planck15.comoving_distance(zs_list)

    path = ns.source
    #'/global/cscratch1/sd/yfeng1/m3127/desi/1536-9201-40eae2464/lightcone/usmesh/'

    cat = BigFileCatalog(path, dataset=ns.dataset)

    kappa = 0
    Nm = 0
    kappabar = 0

    npix = healpix.nside2npix(ns.nside)
    localsize = npix * (cat.comm.rank + 1) // cat.comm.size - npix * (
        cat.comm.rank) // cat.comm.size
    nbar = (cat.attrs['NC']**3 / cat.attrs['BoxSize']**3 *
            cat.attrs['ParticleFraction'])[0]

    Nsteps = int(numpy.round((zlmax - zlmin) / ns.zstep))
    if Nsteps < 2: Nsteps = 2
    z = numpy.linspace(zlmax, zlmin, Nsteps, endpoint=True)

    if cat.comm.rank == 0:
        cat.logger.info("Splitting data redshift bins %s" % str(z))

    for z1, z2 in zip(z[:-1], z[1:]):
        import gc
        gc.collect()
        if cat.comm.rank == 0:
            cat.logger.info("nbar = %g, zlmin = %g, zlmax = %g zs = %s" %
                            (nbar, z2, z1, zs_list))

        slice = read_range(cat, 1 / (1 + z1), 1 / (1 + z2))

        if slice.csize == 0: continue
        if cat.comm.rank == 0:
            cat.logger.info("read %d particles" % slice.csize)

        kappa1, kappa1bar, Nm1 = make_kappa_maps(slice, ns.nside, zs_list,
                                                 ds_list, localsize, nbar)

        kappa = kappa + kappa1
        Nm = Nm + Nm1
        kappabar = kappabar + kappa1bar

    cat.comm.barrier()

    if cat.comm.rank == 0:
        # use bigfile because it allows concurrent write to different datasets.
        cat.logger.info("writing to %s", ns.output)

    for i, (zs, ds) in enumerate(zip(zs_list, ds_list)):
        std = numpy.std(cat.comm.allgather(len(kappa[i])))
        mean = numpy.mean(cat.comm.allgather(len(kappa[i])))
        if cat.comm.rank == 0:
            cat.logger.info(
                "started gathering source plane %s, size-var = %g, size-bar = %g"
                % (zs, std, mean))

        kappa1 = GatherArray(kappa[i], cat.comm)
        Nm1 = GatherArray(Nm[i], cat.comm)

        if cat.comm.rank == 0:
            cat.logger.info("done gathering source plane %s" % zs)

        if cat.comm.rank == 0:
            fname = ns.output + "/WL-%02.2f-N%04d" % (zs, ns.nside)
            cat.logger.info("started writing source plane %s" % zs)

            with bigfile.File(fname, create=True) as ff:

                ds1 = ff.create_from_array("kappa", kappa1, Nfile=1)
                ds2 = ff.create_from_array("Nm", Nm1, Nfile=1)

                for d in ds1, ds2:
                    d.attrs['kappabar'] = kappabar[i]
                    d.attrs['nside'] = ns.nside
                    d.attrs['zlmin'] = zlmin
                    d.attrs['zlmax'] = zlmax
                    d.attrs['zs'] = zs
                    d.attrs['ds'] = ds
                    d.attrs['nbar'] = nbar

        cat.comm.barrier()
        if cat.comm.rank == 0:
            # use bigfile because it allows concurrent write to different datasets.
            cat.logger.info("source plane at %g written. " % zs)