예제 #1
0
 def _read_obj_field(self, obj, field, fid_data):
     if fid_data is None: fid_data = (None, None)
     fid, data = fid_data
     if fid is None:
         close = True
         fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
     else:
         close = False
     if data is None:
         data = np.empty(obj.ActiveDimensions[::-1],
                         dtype=self._field_dtype)
     ftype, fname = field
     try:
         node = "/Grid%08i/%s" % (obj.id, fname)
         dg = h5py.h5d.open(fid, b(node))
     except KeyError:
         if fname == "Dark_Matter_Density":
             data[:] = 0
             return data.T
         raise
     dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
     # I don't know why, but on some installations of h5py this works, but
     # on others, nope.  Doesn't seem to be a version thing.
     #dg.close()
     if close:
         fid.close()
     return data.T
예제 #2
0
 def _read_chunk_data(self, chunk, fields):
     fid = fn = None
     rv = {}
     mylog.debug("Preloading fields %s", fields)
     # Split into particles and non-particles
     fluid_fields, particle_fields = [], []
     for ftype, fname in fields:
         if ftype in self.ds.particle_types:
             particle_fields.append((ftype, fname))
         else:
             fluid_fields.append((ftype, fname))
     if len(particle_fields) > 0:
         selector = AlwaysSelector(self.ds)
         rv.update(
             self._read_particle_selection([chunk], selector,
                                           particle_fields))
     if len(fluid_fields) == 0: return rv
     h5_type = self._field_dtype
     for g in chunk.objs:
         rv[g.id] = gf = {}
         if g.id in self._cached_fields:
             rv[g.id].update(self._cached_fields[g.id])
         if g.filename is None: continue
         elif g.filename != fn:
             if fid is not None: fid.close()
             fid = None
         if fid is None:
             fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
             fn = g.filename
         data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
         data_view = data.swapaxes(0, -1)
         for field in fluid_fields:
             if field in gf:
                 self._hits += 1
                 continue
             self._misses += 1
             ftype, fname = field
             try:
                 node = "/Grid%08i/%s" % (g.id, fname)
                 dg = h5py.h5d.open(fid, b(node))
             except KeyError:
                 if fname == "Dark_Matter_Density": continue
                 raise
             dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
             gf[field] = data_view.copy()
     if fid: fid.close()
     if self._cache_on:
         for gid in rv:
             self._cached_fields.setdefault(gid, {})
             self._cached_fields[gid].update(rv[gid])
     return rv
예제 #3
0
 def _read_chunk_data(self, chunk, fields):
     fid = fn = None
     rv = {}
     mylog.debug("Preloading fields %s", fields)
     # Split into particles and non-particles
     fluid_fields, particle_fields = [], []
     for ftype, fname in fields:
         if ftype in self.ds.particle_types:
             particle_fields.append((ftype, fname))
         else:
             fluid_fields.append((ftype, fname))
     if len(particle_fields) > 0:
         selector = AlwaysSelector(self.ds)
         rv.update(self._read_particle_selection(
           [chunk], selector, particle_fields))
     if len(fluid_fields) == 0: return rv
     h5_type = self._field_dtype
     for g in chunk.objs:
         rv[g.id] = gf = {}
         if g.id in self._cached_fields:
             rv[g.id].update(self._cached_fields[g.id])
         if g.filename is None: continue
         elif g.filename != fn:
             if fid is not None: fid.close()
             fid = None
         if fid is None:
             fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
             fn = g.filename
         data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
         data_view = data.swapaxes(0,2)
         for field in fluid_fields:
             if field in gf:
                 self._hits += 1
                 continue
             self._misses += 1
             ftype, fname = field
             try:
                 node = "/Grid%08i/%s" % (g.id, fname)
                 dg = h5py.h5d.open(fid, b(node))
             except KeyError:
                 if fname == "Dark_Matter_Density": continue
                 raise
             dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
             gf[field] = data_view.copy()
     if fid: fid.close()
     if self._cache_on:
         for gid in rv:
             self._cached_fields.setdefault(gid, {})
             self._cached_fields[gid].update(rv[gid])
     return rv
예제 #4
0
    def _get_hosts(self):
        if self.comm.rank == 0 or self.comm.size == 1:

            #Temporary mac hostname fix
            try:
                server_address = socket.gethostname()
                socket.gethostbyname(server_address)
            except socket.gaierror:
                server_address = "localhost"

            sock = socket.socket()
            sock.bind(('', 0))
            port = sock.getsockname()[-1]
            del sock
        else:
            server_address, port = None, None
        self.server_address, self.port = self.comm.mpi_bcast(
            (server_address, port))
        self.server_address = six.b(str(self.server_address))
        self.port = six.b(str(self.port))
def _sanitize_list(flist):
    temp = []
    for item in flist:
        if isinstance(item, string_types):
            temp.append(b(item))
        elif isinstance(item, tuple) and \
                all(isinstance(i, string_types) for i in item):
            temp.append(tuple(_sanitize_list(list(item))))
        else:
            temp.append(item)
    return temp
def _sanitize_list(flist):
    temp = []
    for item in flist:
        if isinstance(item, string_types):
            temp.append(b(item))
        elif isinstance(item, tuple) and \
                all(isinstance(i, string_types) for i in item):
            temp.append(tuple(_sanitize_list(list(item))))
        else:
            temp.append(item)
    return temp
예제 #7
0
 def _read_obj_field(self, obj, field, fid_data):
     if fid_data is None: fid_data = (None, None)
     fid, data = fid_data
     if fid is None:
         close = True
         fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
     else:
         close = False
     ftype, fname = field
     node = "/%s/field %s" % (obj.block_name, fname)
     dg = h5py.h5d.open(fid, b(node))
     rdata = np.empty(self.ds.grid_dimensions[:self.ds.dimensionality],
                      dtype=self._field_dtype)
     dg.read(h5py.h5s.ALL, h5py.h5s.ALL, rdata)
     if close:
         fid.close()
     data = rdata[self._base].T
     if self.ds.dimensionality < 3:
         nshape = data.shape + (1, ) * (3 - self.ds.dimensionality)
         data = np.reshape(data, nshape)
     return data
예제 #8
0
 def __init__(self,
              ts,
              num_readers=1,
              num_writers=None,
              outbase="rockstar_halos",
              particle_type="all",
              force_res=None,
              total_particles=None,
              dm_only=False,
              particle_mass=None,
              min_halo_size=25):
     if is_root():
         mylog.info(
             "The citation for the Rockstar halo finder can be found at")
         mylog.info("http://adsabs.harvard.edu/abs/2013ApJ...762..109B")
     ParallelAnalysisInterface.__init__(self)
     # Decide how we're working.
     if ytcfg.getboolean("yt", "inline") is True:
         self.runner = InlineRunner()
     else:
         self.runner = StandardRunner(num_readers, num_writers)
     self.num_readers = self.runner.num_readers
     self.num_writers = self.runner.num_writers
     mylog.info("Rockstar is using %d readers and %d writers",
                self.num_readers, self.num_writers)
     # Note that Rockstar does not support subvolumes.
     # We assume that all of the snapshots in the time series
     # use the same domain info as the first snapshots.
     if not isinstance(ts, DatasetSeries):
         ts = DatasetSeries([ts])
     self.ts = ts
     self.particle_type = particle_type
     self.outbase = six.b(outbase)
     self.min_halo_size = min_halo_size
     if force_res is None:
         tds = ts[-1]  # Cache a reference
         self.force_res = tds.index.get_smallest_dx().in_units("Mpc/h")
         # We have to delete now to wipe the index
         del tds
     else:
         self.force_res = force_res
     self.total_particles = total_particles
     self.dm_only = dm_only
     self.particle_mass = particle_mass
     # Setup pool and workgroups.
     self.pool, self.workgroup = self.runner.setup_pool()
     p = self._setup_parameters(ts)
     params = self.comm.mpi_bcast(p, root=self.pool['readers'].ranks[0])
     self.__dict__.update(params)
     self.handler = rockstar_interface.RockstarInterface(self.ts)
예제 #9
0
def h5rd(fname, path, dtype=None):
    """ Read Data. Return a dataset located at <path> in file <fname> as
    a numpy array.
    e.g. rd( fname, '/PartType0/Coordinates' ). """

    data = None
    fid = h5py.h5f.open(six.b(fname), h5py.h5f.ACC_RDONLY)
    dg = h5py.h5d.open(fid, path.encode('ascii'))
    if dtype is None:
        dtype = dg.dtype
    data = np.zeros(dg.shape, dtype=dtype)
    dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
    fid.close()
    return data
예제 #10
0
 def io_iter(self, chunks, fields):
     for chunk in chunks:
         fid = None
         filename = -1
         for obj in chunk.objs:
             if obj.filename is None: continue
             if obj.filename != filename:
                 # Note one really important thing here: even if we do
                 # implement LRU caching in the _read_obj_field function,
                 # we'll still be doing file opening and whatnot.  This is a
                 # problem, but one we can return to.
                 if fid is not None:
                     fid.close()
                 fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
                 filename = obj.filename
             for field in fields:
                 data = None
                 yield field, obj, self._read_obj_field(
                     obj, field, (fid, data))
     if fid is not None:
         fid.close()
예제 #11
0
 def io_iter(self, chunks, fields):
     h5_dtype = self._field_dtype
     for chunk in chunks:
         fid = None
         filename = -1
         for obj in chunk.objs:
             if obj.filename is None: continue
             if obj.filename != filename:
                 # Note one really important thing here: even if we do
                 # implement LRU caching in the _read_obj_field function,
                 # we'll still be doing file opening and whatnot.  This is a
                 # problem, but one we can return to.
                 if fid is not None:
                     fid.close()
                 fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY)
                 filename = obj.filename
             for field in fields:
                 nodal_flag = self.ds.field_info[field].nodal_flag
                 dims = obj.ActiveDimensions[::-1] + nodal_flag[::-1]
                 data = np.empty(dims, dtype=h5_dtype)
                 yield field, obj, self._read_obj_field(
                     obj, field, (fid, data))
     if fid is not None:
         fid.close()
예제 #12
0
 def _read_fluid_selection(self, chunks, selector, fields, size):
     rv = {}
     # Now we have to do something unpleasant
     chunks = list(chunks)
     if selector.__class__.__name__ == "GridSelector":
         if not (len(chunks) == len(chunks[0].objs) == 1):
             raise RuntimeError
         g = chunks[0].objs[0]
         f = h5py.File(u(g.filename), 'r')
         if g.id in self._cached_fields:
             gf = self._cached_fields[g.id]
             rv.update(gf)
         if len(rv) == len(fields): return rv
         gds = f.get("/Grid%08i" % g.id)
         for field in fields:
             if field in rv:
                 self._hits += 1
                 continue
             self._misses += 1
             ftype, fname = field
             if fname in gds:
                 rv[(ftype, fname)] = gds.get(fname).value.swapaxes(0,2)
             else:
                 rv[(ftype, fname)] = np.zeros(g.ActiveDimensions)
         if self._cache_on:
             for gid in rv:
                 self._cached_fields.setdefault(gid, {})
                 self._cached_fields[gid].update(rv[gid])
         f.close()
         return rv
     if size is None:
         size = sum((g.count(selector) for chunk in chunks
                     for g in chunk.objs))
     for field in fields:
         ftype, fname = field
         fsize = size
         rv[field] = np.empty(fsize, dtype="float64")
     ng = sum(len(c.objs) for c in chunks)
     mylog.debug("Reading %s cells of %s fields in %s grids",
                size, [f2 for f1, f2 in fields], ng)
     ind = 0
     h5_type = self._field_dtype
     for chunk in chunks:
         fid = None
         for g in chunk.objs:
             if g.filename is None: continue
             if fid is None:
                 fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY)
             gf = self._cached_fields.get(g.id, {})
             data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type)
             data_view = data.swapaxes(0,2)
             nd = 0
             for field in fields:
                 if field in gf:
                     nd = g.select(selector, gf[field], rv[field], ind)
                     self._hits += 1
                     continue
                 self._misses += 1
                 ftype, fname = field
                 try:
                     node = "/Grid%08i/%s" % (g.id, fname)
                     dg = h5py.h5d.open(fid, b(node))
                 except KeyError:
                     if fname == "Dark_Matter_Density": continue
                     raise
                 dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data)
                 if self._cache_on:
                     self._cached_fields.setdefault(g.id, {})
                     # Copy because it's a view into an empty temp array
                     self._cached_fields[g.id][field] = data_view.copy()
                 nd = g.select(selector, data_view, rv[field], ind) # caches
             ind += nd
         if fid: fid.close()
     return rv
예제 #13
0
 def run(self, block_ratio=1, callbacks=None, restart=False):
     """
     
     """
     if block_ratio != 1:
         raise NotImplementedError
     self._get_hosts()
     # Find restart output number
     num_outputs = len(self.ts)
     if restart:
         restart_file = os.path.join(self.outbase, "restart.cfg")
         if not os.path.exists(restart_file):
             raise RuntimeError("Restart file %s not found" %
                                (restart_file))
         with open(restart_file) as restart_fh:
             for l in restart_fh:
                 if l.startswith("RESTART_SNAP"):
                     restart_num = int(l.split("=")[1])
                 if l.startswith("NUM_WRITERS"):
                     num_writers = int(l.split("=")[1])
         if num_writers != self.num_writers:
             raise RuntimeError(
                 "Number of writers in restart has changed from the original "
                 "run (OLD = %d, NEW = %d).  To avoid problems in the "
                 "restart, choose the same number of writers." % \
                     (num_writers, self.num_writers))
         # Remove the datasets that were already analyzed
         self.ts._pre_outputs = self.ts._pre_outputs[restart_num:]
     else:
         restart_num = 0
     self.handler.setup_rockstar(six.b(self.server_address),
                                 six.b(self.port),
                                 num_outputs,
                                 self.total_particles,
                                 self.particle_type,
                                 particle_mass=self.particle_mass,
                                 parallel=self.comm.size > 1,
                                 num_readers=self.num_readers,
                                 num_writers=self.num_writers,
                                 writing_port=-1,
                                 block_ratio=block_ratio,
                                 outbase=six.b(self.outbase),
                                 force_res=self.force_res,
                                 callbacks=callbacks,
                                 restart_num=restart_num,
                                 min_halo_size=self.min_halo_size)
     # Make the directory to store the halo lists in.
     if not self.outbase:
         self.outbase = os.getcwd()
     if self.comm.rank == 0 and not restart:
         if not os.path.exists(self.outbase):
             os.makedirs(self.outbase)
         # Make a record of which dataset corresponds to which set of
         # output files because it will be easy to lose this connection.
         fp = open(self.outbase + '/datasets.txt', 'w')
         fp.write("# dsname\tindex\n")
         for i, ds in enumerate(self.ts):
             dsloc = path.join(path.relpath(ds.fullpath), ds.basename)
             line = "%s\t%d\n" % (dsloc, i)
             fp.write(line)
         fp.close()
     # This barrier makes sure the directory exists before it might be used.
     self.comm.barrier()
     if self.comm.size == 1:
         self.handler.call_rockstar()
     else:
         # And run it!
         self.runner.run(self.handler, self.workgroup)
     self.comm.barrier()
     self.pool.free_all()