Exemplo n.º 1
0
    def __init__(self, path, bunchsize=4 * 1024 * 1024):

        self.path = path
        self.bunchsize = bunchsize

        if self.comm.rank == 0:
            datastorage = files.DataStorage(self.path, files.HaloLabelFile)
        else:
            datastorage = None
        datastorage = self.comm.bcast(datastorage)
        self.size = sum(datastorage.npart)
Exemplo n.º 2
0
    def parallel_read(self, columns, full=False):
        """ read data in parallel. if Full is True, neglect bunchsize. """
        Ntot = 0
        # avoid reading Velocity if RSD is not requested.
        # this is only needed for large data like a TPMSnapshot
        # for small Pandas reader etc it doesn't take time to
        # read velocity

        if self.rsd is not None:
            newcolumns = set(columns + ['Velocity'])
        else:
            newcolumns = set(columns)

        bunchsize = self.bunchsize
        if full: bunchsize = -1
        if full and len(self.ptype) > 1:
            raise ValueError("cannot read multple ptype in a full load")
        for ptype in self.ptype:
            args = dict(ptype=ptype,
                        posdtype=self.posdtype,
                        veldtype=self.veldtype,
                        massdtype=self.massdtype,
                        iddtype=self.iddtype)

            if self.comm.rank == 0:
                datastorage = files.DataStorage(self.path,
                                                files.GadgetSnapshotFile, args)
                f0 = files.GadgetSnapshotFile(self.path, 0, args)
                boxsize = f0.header['boxsize']
            else:
                datastorage = None
                boxsize = None
            boxsize = self.comm.bcast(boxsize)
            datastorage = self.comm.bcast(datastorage)

            for round, P in enumerate(
                    datastorage.iter(comm=self.comm,
                                     columns=newcolumns,
                                     bunchsize=bunchsize)):
                P = dict(zip(newcolumns, P))
                if 'Position' in P:
                    P['Position'] /= boxsize
                    P['Position'] *= self.BoxSize
                if 'Velocity' in P:
                    raise KeyError('Velocity is not yet supported')

                if self.rsd is not None:
                    dir = "xyz".index(self.rsd)
                    P['Position'][:, dir] += P['Velocity'][:, dir]
                    P['Position'][:, dir] %= self.BoxSize[dir]

                yield [P.get(key, None) for key in columns]
Exemplo n.º 3
0
    def __init__(self, path, BoxSize, rsd=None, bunchsize=4 * 1024 * 1024):

        self.path = path
        self.BoxSize = BoxSize
        self.rsd = rsd
        self.bunchsize = bunchsize

        if self.comm.rank == 0:
            datastorage = files.DataStorage(self.path, files.TPMSnapshotFile)
            size = sum(datastorage.npart)
        else:
            size = None
        self.size = self.comm.bcast(size)
Exemplo n.º 4
0
    def parallel_read(self, columns, full=False):
        """ 
        read data in parallel. if Full is True, neglect bunchsize.
        
        This supports `Position`, `Velocity` columns
        """
        Ntot = 0
        # avoid reading Velocity if RSD is not requested.
        # this is only needed for large data like a TPMSnapshot
        # for small Pandas reader etc it doesn't take time to
        # read velocity

        if self.rsd is not None:
            newcolumns = set(columns + ['Velocity'])
        else:
            newcolumns = set(columns)

        if 'Mass' in newcolumns:
            newcolumns.remove('Mass')
        if 'Weight' in newcolumns:
            newcolumns.remove('Weight')

        bunchsize = self.bunchsize
        if full: bunchsize = -1

        if self.comm.rank == 0:
            datastorage = files.DataStorage(self.path, files.TPMSnapshotFile)
        else:
            datastorage = None
        datastorage = self.comm.bcast(datastorage)

        for round, P0 in enumerate(
                datastorage.iter(comm=self.comm,
                                 columns=newcolumns,
                                 bunchsize=bunchsize)):
            P = dict(zip(newcolumns, P0))
            if 'Position' in P:
                P['Position'] *= self.BoxSize
            if 'Velocity' in P:
                P['Velocity'] *= self.BoxSize

            if self.rsd is not None:
                dir = "xyz".index(self.rsd)
                P['Position'][:, dir] += P['Velocity'][:, dir]
                P['Position'][:, dir] %= self.BoxSize[dir]

            yield [P.get(key, None) for key in columns]
Exemplo n.º 5
0
    def read(self, columns, full=False):
        """ read data in parallel. if Full is True, neglect bunchsize. """
        Ntot = 0
        # avoid reading Velocity if RSD is not requested.
        # this is only needed for large data like a TPMSnapshot
        # for small Pandas reader etc it doesn't take time to
        # read velocity

        if self.rsd is not None:
            newcolumns = set(columns + ['Velocity'])
        else:
            newcolumns = set(columns)

        bunchsize = self.bunchsize
        if full: bunchsize = -1

        args = dict(posdtype=self.posdtype,
                    veldtype=self.veldtype,
                    massdtype=self.massdtype,
                    iddtype=self.iddtype)

        if self.comm.rank == 0:
            datastorage = files.DataStorage(self.path,
                                            files.GadgetGroupTabFile, args)
        else:
            datastorage = None
        datastorage = self.comm.bcast(datastorage)

        for round, P in enumerate(
                datastorage.iter(comm=self.comm,
                                 columns=newcolumns,
                                 bunchsize=bunchsize)):
            P = dict(zip(newcolumns, P))
            if 'Position' in P:
                P['Position'] /= self.mpch
            if 'Velocity' in P:
                raise KeyError('Velocity is not yet supported')

            if self.rsd is not None:
                dir = "xyz".index(self.rsd)
                P['Position'][:, dir] += P['Velocity'][:, dir]
                P['Position'][:, dir] %= self.BoxSize[dir]

            yield [P[key] for key in columns]
Exemplo n.º 6
0
    def parallel_read(self, columns, full=False):
        """ read data in parallel. if Full is True, neglect bunchsize. """
        Ntot = 0
        # avoid reading Velocity if RSD is not requested.
        # this is only needed for large data like a TPMSnapshot
        # for small Pandas reader etc it doesn't take time to
        # read velocity

        bunchsize = self.bunchsize
        if full: bunchsize = -1

        if self.comm.rank == 0:
            datastorage = files.DataStorage(self.path, files.HaloLabelFile)
        else:
            datastorage = None
        datastorage = self.comm.bcast(datastorage)

        for round, P in enumerate(
                datastorage.iter(comm=self.comm,
                                 columns=columns,
                                 bunchsize=bunchsize)):
            P = dict(zip(columns, P))

            yield [P.get(key, None) for key in columns]