def test_ptype(): n_particles = 100 fields = ['particle_position_x', 'particle_position_y', 'particle_position_z', 'particle_index', 'particle_dummy'] negative = [False, False, False, False, False] units = ['cm', 'cm', 'cm', '1', '1'] # Setup filters on the 'particle_dummy' field, keeping only the first 50 @particle_filter(name='dummy', requires=["particle_dummy"]) def dummy(pfilter, data): return data[(pfilter.filtered_type, "particle_dummy")] <= n_particles // 2 # Setup fake particle datasets with repeated ids. This should work because # the ids are unique among `dummy_particles` so let's test this data = {'particle_index': np.arange(n_particles) % (n_particles // 2), 'particle_dummy': np.arange(n_particles)} all_ds = [fake_particle_ds(fields=fields, negative=negative, units=units, npart=n_particles, data=data)] for ds in all_ds: ds.add_particle_filter('dummy') ts = DatasetSeries(all_ds) # Select all dummy particles print(ts[0].derived_field_list) ids = ts[0].all_data()['dummy', 'particle_index'] # Build trajectories ts.particle_trajectories(ids, ptype='dummy')
def test_init_fake_dataseries(): file_list = [f"fake_data_file_{str(i).zfill(4)}" for i in range(10)] with tempfile.TemporaryDirectory() as tmpdir: pfile_list = [Path(tmpdir) / file for file in file_list] sfile_list = [str(file) for file in pfile_list] for file in pfile_list: file.touch() pattern = Path(tmpdir) / "fake_data_file_*" # init from str pattern ts = DatasetSeries(pattern) assert ts._pre_outputs == sfile_list # init from Path pattern ppattern = Path(pattern) ts = DatasetSeries(ppattern) assert ts._pre_outputs == sfile_list # init form str list ts = DatasetSeries(sfile_list) assert ts._pre_outputs == sfile_list # init form Path list ts = DatasetSeries(pfile_list) assert ts._pre_outputs == pfile_list # rejected input type (str repr of a list) "[file1, file2, ...]" assert_raises(FileNotFoundError, DatasetSeries, str(file_list)) # finally, check that ts[0] fails to actually load assert_raises(YTUnidentifiedDataType, ts.__getitem__, 0)
def test_orbit_traj(): fields = ["particle_velocity_x", "particle_velocity_y", "particle_velocity_z"] my_fns = glob.glob(os.path.join(data_path, "Orbit/orbit_hdf5_chk_00[0-9][0-9]")) my_fns.sort() ts = DatasetSeries(my_fns) ds = ts[0] traj = ts.particle_trajectories([1, 2], fields=fields, suppress_logging=True) for field in pfields+vfields: def field_func(name): return traj[field] yield GenericArrayTest(ds, field_func, args=[field])
def test_etc_traj(): fields = ["particle_velocity_x", "particle_velocity_y", "particle_velocity_z"] my_fns = glob.glob(os.path.join(data_path, "enzo_tiny_cosmology/DD000[0-9]/*.hierarchy")) my_fns.sort() ts = DatasetSeries(my_fns) ds = ts[0] sp = ds.sphere("max", (0.5, "Mpc")) indices = sp["particle_index"][sp["particle_type"] == 1][:5] traj = ts.particle_trajectories(indices, fields=fields, suppress_logging=True) traj.add_fields(["density"]) for field in pfields+vfields+["density"]: def field_func(name): return traj[field] yield GenericArrayTest(ds, field_func, args=[field])
def test_pattern_expansion(): file_list = [f"fake_data_file_{str(i).zfill(4)}" for i in range(10)] with tempfile.TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) for file in file_list: (tmp_path / file).touch() pattern = tmp_path / "fake_data_file_*" expected = [str(tmp_path / file) for file in file_list] found = DatasetSeries._get_filenames_from_glob_pattern(pattern) assert found == expected found2 = DatasetSeries._get_filenames_from_glob_pattern(Path(pattern)) assert found2 == expected
def test_uniqueness(): n_particles = 2 n_steps = 2 ids = np.arange(n_particles, dtype=int) % (n_particles // 2) data = {"particle_index": ids} fields = [ "particle_position_x", "particle_position_y", "particle_position_z", "particle_index", ] negative = [False, False, False, False] units = ["cm", "cm", "cm", "1"] ts = DatasetSeries( [ fake_particle_ds( fields=fields, negative=negative, units=units, npart=n_particles, data=data, ) for i in range(n_steps) ] ) assert_raises(YTIllDefinedParticleData, ts.particle_trajectories, [0])
def __init__(self, outputs, indices, fields=None, suppress_logging=False): indices.sort() # Just in case the caller wasn't careful self.field_data = YTFieldData() if isinstance(outputs, DatasetSeries): self.data_series = outputs else: self.data_series = DatasetSeries(outputs) self.masks = [] self.sorts = [] self.array_indices = [] self.indices = indices self.num_indices = len(indices) self.num_steps = len(outputs) self.times = [] self.suppress_logging = suppress_logging # Default fields if fields is None: fields = [] fields.append("particle_position_x") fields.append("particle_position_y") fields.append("particle_position_z") fields = list(OrderedDict.fromkeys(fields)) if self.suppress_logging: old_level = int(ytcfg.get("yt","loglevel")) mylog.setLevel(40) my_storage = {} pbar = get_pbar("Constructing trajectory information", len(self.data_series)) for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)): dd = ds.all_data() idx_field = dd._determine_fields("particle_index")[0] newtags = dd[idx_field].ndarray_view().astype("int64") mask = np.in1d(newtags, indices, assume_unique=True) sorts = np.argsort(newtags[mask]) self.array_indices.append(np.where(np.in1d(indices, newtags, assume_unique=True))[0]) self.masks.append(mask) self.sorts.append(sorts) sto.result_id = ds.parameter_filename sto.result = ds.current_time pbar.update(i) pbar.finish() if self.suppress_logging: mylog.setLevel(old_level) times = [] for fn, time in sorted(my_storage.items()): times.append(time) self.times = self.data_series[0].arr([time for time in times], times[0].units) self.particle_fields = [] # Instantiate fields the caller requested for field in fields: self._get_data(field)
def __getitem__(self, key): if isinstance(key, slice): if isinstance(key.start, float): return self.get_range(key.start, key.stop) # This will return a sliced up object! return DatasetSeries(self._pre_outputs[key], self.parallel) o = self._pre_outputs[key] fn, step = o o = load(fn, step=step) self._setup_function(o) return o
def __init__(self, ts, num_readers=1, num_writers=None, outbase="rockstar_halos", particle_type="all", force_res=None, total_particles=None, dm_only=False, particle_mass=None, min_halo_size=25): if is_root(): mylog.info( "The citation for the Rockstar halo finder can be found at") mylog.info("http://adsabs.harvard.edu/abs/2013ApJ...762..109B") ParallelAnalysisInterface.__init__(self) # Decide how we're working. if ytcfg.getboolean("yt", "inline") == True: self.runner = InlineRunner() else: self.runner = StandardRunner(num_readers, num_writers) self.num_readers = self.runner.num_readers self.num_writers = self.runner.num_writers mylog.info("Rockstar is using %d readers and %d writers", self.num_readers, self.num_writers) # Note that Rockstar does not support subvolumes. # We assume that all of the snapshots in the time series # use the same domain info as the first snapshots. if not isinstance(ts, DatasetSeries): ts = DatasetSeries([ts]) self.ts = ts self.particle_type = particle_type self.outbase = outbase self.min_halo_size = min_halo_size if force_res is None: tds = ts[-1] # Cache a reference self.force_res = tds.index.get_smallest_dx().in_units("Mpc/h") # We have to delete now to wipe the index del tds else: self.force_res = force_res self.total_particles = total_particles self.dm_only = dm_only self.particle_mass = particle_mass # Setup pool and workgroups. self.pool, self.workgroup = self.runner.setup_pool() p = self._setup_parameters(ts) params = self.comm.mpi_bcast(p, root=self.pool['readers'].ranks[0]) self.__dict__.update(params) self.handler = rockstar_interface.RockstarInterface(self.ts)
def _fof_method(hc, **finder_kwargs): r""" Run the FoF halo finding method. """ ds = hc.data_ds if isinstance(ds, DatasetSeries): ts = ds else: ts = DatasetSeries([ds]) for my_ds in ts: halo_list = FOFHaloFinder(my_ds, **finder_kwargs) _parse_halo_list(hc, halo_list)
def _initialize_dataset(self, ts): if not isinstance(ts, DatasetSeries): if not iterable(ts): ts = [ts] ts = DatasetSeries(ts) return ts
def __init__(self, outputs, indices, fields=None, suppress_logging=False): indices.sort() # Just in case the caller wasn't careful self.field_data = YTFieldData() if isinstance(outputs, DatasetSeries): self.data_series = outputs else: self.data_series = DatasetSeries(outputs) self.masks = [] self.sorts = [] self.array_indices = [] self.indices = indices self.num_indices = len(indices) self.num_steps = len(outputs) self.times = [] self.suppress_logging = suppress_logging # Default fields if fields is None: fields = [] fields.append("particle_position_x") fields.append("particle_position_y") fields.append("particle_position_z") fields = list(OrderedDict.fromkeys(fields)) if self.suppress_logging: old_level = int(ytcfg.get("yt", "loglevel")) mylog.setLevel(40) my_storage = {} pbar = get_pbar("Constructing trajectory information", len(self.data_series)) for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)): dd = ds.all_data() idx_field = dd._determine_fields("particle_index")[0] newtags = dd[idx_field].ndarray_view().astype("int64") mask = np.in1d(newtags, indices, assume_unique=True) sorts = np.argsort(newtags[mask]) self.array_indices.append( np.where(np.in1d(indices, newtags, assume_unique=True))[0]) self.masks.append(mask) self.sorts.append(sorts) sto.result_id = ds.parameter_filename sto.result = ds.current_time pbar.update(i) pbar.finish() if self.suppress_logging: mylog.setLevel(old_level) times = [] for fn, time in sorted(my_storage.items()): times.append(time) self.times = self.data_series[0].arr([time for time in times], times[0].units) self.particle_fields = [] # Instantiate fields the caller requested for field in fields: self._get_data(field)
class ParticleTrajectories(object): r"""A collection of particle trajectories in time over a series of datasets. The ParticleTrajectories object contains a collection of particle trajectories for a specified set of particle indices. Parameters ---------- outputs : `yt.data_objects.time_series.DatasetSeries` or list of strings DatasetSeries object, or a time-sorted list of filenames to construct a new DatasetSeries object. indices : array_like An integer array of particle indices whose trajectories we want to track. If they are not sorted they will be sorted. fields : list of strings, optional A set of fields that is retrieved when the trajectory collection is instantiated. Default : None (will default to the fields 'particle_position_x', 'particle_position_y', 'particle_position_z') suppress_logging : boolean Suppress yt's logging when iterating over the simulation time series. Default : False Examples ________ >>> from yt.mods import * >>> my_fns = glob.glob("orbit_hdf5_chk_00[0-9][0-9]") >>> my_fns.sort() >>> fields = ["particle_position_x", "particle_position_y", >>> "particle_position_z", "particle_velocity_x", >>> "particle_velocity_y", "particle_velocity_z"] >>> ds = load(my_fns[0]) >>> init_sphere = ds.sphere(ds.domain_center, (.5, "unitary")) >>> indices = init_sphere["particle_index"].astype("int") >>> trajs = ParticleTrajectories(my_fns, indices, fields=fields) >>> for t in trajs : >>> print t["particle_velocity_x"].max(), t["particle_velocity_x"].min() """ def __init__(self, outputs, indices, fields=None, suppress_logging=False): indices.sort() # Just in case the caller wasn't careful self.field_data = YTFieldData() if isinstance(outputs, DatasetSeries): self.data_series = outputs else: self.data_series = DatasetSeries(outputs) self.masks = [] self.sorts = [] self.array_indices = [] self.indices = indices self.num_indices = len(indices) self.num_steps = len(outputs) self.times = [] self.suppress_logging = suppress_logging # Default fields if fields is None: fields = [] fields.append("particle_position_x") fields.append("particle_position_y") fields.append("particle_position_z") fields = list(OrderedDict.fromkeys(fields)) if self.suppress_logging: old_level = int(ytcfg.get("yt", "loglevel")) mylog.setLevel(40) my_storage = {} pbar = get_pbar("Constructing trajectory information", len(self.data_series)) for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)): dd = ds.all_data() idx_field = dd._determine_fields("particle_index")[0] newtags = dd[idx_field].ndarray_view().astype("int64") mask = np.in1d(newtags, indices, assume_unique=True) sorts = np.argsort(newtags[mask]) self.array_indices.append( np.where(np.in1d(indices, newtags, assume_unique=True))[0]) self.masks.append(mask) self.sorts.append(sorts) sto.result_id = ds.parameter_filename sto.result = ds.current_time pbar.update(i) pbar.finish() if self.suppress_logging: mylog.setLevel(old_level) times = [] for fn, time in sorted(my_storage.items()): times.append(time) self.times = self.data_series[0].arr([time for time in times], times[0].units) self.particle_fields = [] # Instantiate fields the caller requested for field in fields: self._get_data(field) def has_key(self, key): return (key in self.field_data) def keys(self): return self.field_data.keys() def __getitem__(self, key): """ Get the field associated with key. """ if key == "particle_time": return self.times if key not in self.field_data: self._get_data(key) return self.field_data[key] def __setitem__(self, key, val): """ Sets a field to be some other value. """ self.field_data[key] = val def __delitem__(self, key): """ Delete the field from the trajectory """ del self.field_data[key] def __iter__(self): """ This iterates over the trajectories for the different particles, returning dicts of fields for each trajectory """ for idx in range(self.num_indices): traj = {} traj["particle_index"] = self.indices[idx] traj["particle_time"] = self.times for field in self.field_data.keys(): traj[field] = self[field][idx, :] yield traj def __len__(self): """ The number of individual trajectories """ return self.num_indices def add_fields(self, fields): """ Add a list of fields to an existing trajectory Parameters ---------- fields : list of strings A list of fields to be added to the current trajectory collection. Examples ________ >>> from yt.mods import * >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.add_fields(["particle_mass", "particle_gpot"]) """ for field in fields: if field not in self.field_data: self._get_data(field) def _get_data(self, field): """ Get a field to include in the trajectory collection. The trajectory collection itself is a dict of 2D numpy arrays, with shape (num_indices, num_steps) """ if field not in self.field_data: if self.suppress_logging: old_level = int(ytcfg.get("yt", "loglevel")) mylog.setLevel(40) ds_first = self.data_series[0] dd_first = ds_first.all_data() fd = dd_first._determine_fields(field)[0] if field not in self.particle_fields: if self.data_series[0].field_info[fd].particle_type: self.particle_fields.append(field) particles = np.empty((self.num_indices, self.num_steps)) particles[:] = np.nan step = int(0) pbar = get_pbar("Generating field %s in trajectories." % (field), self.num_steps) my_storage = {} for i, (sto, ds) in enumerate( self.data_series.piter(storage=my_storage)): mask = self.masks[i] sort = self.sorts[i] if field in self.particle_fields: # This is easy... just get the particle fields dd = ds.all_data() pfield = dd[fd].ndarray_view()[mask][sort] else: # This is hard... must loop over grids pfield = np.zeros((self.num_indices)) x = self["particle_position_x"][:, step].ndarray_view() y = self["particle_position_y"][:, step].ndarray_view() z = self["particle_position_z"][:, step].ndarray_view() # This will fail for non-grid index objects particle_grids, particle_grid_inds = ds.index._find_points( x, y, z) for grid in particle_grids: cube = grid.retrieve_ghost_zones(1, [fd]) CICSample_3( x, y, z, pfield, self.num_indices, cube[fd], np.array(grid.LeftEdge).astype(np.float64), np.array(grid.ActiveDimensions).astype(np.int32), grid.dds[0]) sto.result_id = ds.parameter_filename sto.result = (self.array_indices[i], pfield) pbar.update(step) step += 1 pbar.finish() for i, (fn, (indices, pfield)) in enumerate(sorted(my_storage.items())): particles[indices, i] = pfield self.field_data[field] = array_like_field(dd_first, particles, fd) if self.suppress_logging: mylog.setLevel(old_level) return self.field_data[field] def trajectory_from_index(self, index): """ Retrieve a single trajectory corresponding to a specific particle index Parameters ---------- index : int This defines which particle trajectory from the ParticleTrajectories object will be returned. Returns ------- A dictionary corresponding to the particle's trajectory and the fields along that trajectory Examples -------- >>> from yt.mods import * >>> import matplotlib.pylab as pl >>> trajs = ParticleTrajectories(my_fns, indices) >>> traj = trajs.trajectory_from_index(indices[0]) >>> pl.plot(traj["particle_time"], traj["particle_position_x"], "-x") >>> pl.savefig("orbit") """ mask = np.in1d(self.indices, (index, ), assume_unique=True) if not np.any(mask): print("The particle index %d is not in the list!" % (index)) raise IndexError fields = [field for field in sorted(self.field_data.keys())] traj = {} traj["particle_time"] = self.times traj["particle_index"] = index for field in fields: traj[field] = self[field][mask, :][0] return traj @parallel_root_only def write_out(self, filename_base): """ Write out particle trajectories to tab-separated ASCII files (one for each trajectory) with the field names in the file header. Each file is named with a basename and the index number. Parameters ---------- filename_base : string The prefix for the outputted ASCII files. Examples -------- >>> from yt.mods import * >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.write_out("orbit_trajectory") """ fields = [field for field in sorted(self.field_data.keys())] num_fields = len(fields) first_str = "# particle_time\t" + "\t".join(fields) + "\n" template_str = "%g\t" * num_fields + "%g\n" for ix in range(self.num_indices): outlines = [first_str] for it in range(self.num_steps): outlines.append( template_str % tuple([self.times[it]] + [self[field][ix, it] for field in fields])) fid = open(filename_base + "_%d.dat" % self.indices[ix], "w") fid.writelines(outlines) fid.close() del fid @parallel_root_only def write_out_h5(self, filename): """ Write out all the particle trajectories to a single HDF5 file that contains the indices, the times, and the 2D array for each field individually Parameters ---------- filename : string The output filename for the HDF5 file Examples -------- >>> from yt.mods import * >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.write_out_h5("orbit_trajectories") """ fid = h5py.File(filename, "w") fields = [field for field in sorted(self.field_data.keys())] fid.create_dataset("particle_indices", dtype=np.int32, data=self.indices) fid.create_dataset("particle_time", data=self.times) for field in fields: fid.create_dataset("%s" % field, data=self[field]) fid.close()
def get_time_series(self, time_data=True, redshift_data=True, initial_time=None, final_time=None, initial_redshift=None, final_redshift=None, initial_cycle=None, final_cycle=None, times=None, redshifts=None, tolerance=None, parallel=True, setup_function=None): """ Instantiate a DatasetSeries object for a set of outputs. If no additional keywords given, a DatasetSeries object will be created with all potential datasets created by the simulation. Outputs can be gather by specifying a time or redshift range (or combination of time and redshift), with a specific list of times or redshifts, a range of cycle numbers (for cycle based output), or by simply searching all subdirectories within the simulation directory. time_data : bool Whether or not to include time outputs when gathering datasets for time series. Default: True. redshift_data : bool Whether or not to include redshift outputs when gathering datasets for time series. Default: True. initial_time : tuple of type (float, str) The earliest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (5.0, "Gyr"). If None, the initial time of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_time : tuple of type (float, str) The latest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (13.7, "Gyr"). If None, the final time of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. times : tuple of type (float array, str) A list of times for which outputs will be found and the units of those values. For example, ([0, 1, 2, 3], "s"). Default: None. initial_redshift : float The earliest redshift for outputs to be included. If None, the initial redshift of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_redshift : float The latest redshift for outputs to be included. If None, the final redshift of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. redshifts : array_like A list of redshifts for which outputs will be found. Default: None. initial_cycle : float The earliest cycle for outputs to be included. If None, the initial cycle of the simulation is used. This can only be used with final_cycle. Default: None. final_cycle : float The latest cycle for outputs to be included. If None, the final cycle of the simulation is used. This can only be used in combination with initial_cycle. Default: None. tolerance : float Used in combination with "times" or "redshifts" keywords, this is the tolerance within which outputs are accepted given the requested times or redshifts. If None, the nearest output is always taken. Default: None. parallel : bool/int If True, the generated DatasetSeries will divide the work such that a single processor works on each dataset. If an integer is supplied, the work will be divided into that number of jobs. Default: True. setup_function : callable, accepts a ds This function will be called whenever a dataset is loaded. Examples -------- >>> import yt >>> es = yt.simulation("my_simulation.par", "Enzo") >>> es.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"), redshift_data=False) >>> es.get_time_series(redshifts=[3, 2, 1, 0]) >>> es.get_time_series(final_cycle=100000) >>> # after calling get_time_series >>> for ds in es.piter(): ... p = ProjectionPlot(ds, 'x', "density") ... p.save() >>> # An example using the setup_function keyword >>> def print_time(ds): ... print ds.current_time >>> es.get_time_series(setup_function=print_time) >>> for ds in es: ... SlicePlot(ds, "x", "Density").save() """ if (initial_redshift is not None or \ final_redshift is not None) and \ not self.cosmological_simulation: raise InvalidSimulationTimeSeries( "An initial or final redshift has been given for a " + "noncosmological simulation.") if time_data and redshift_data: my_all_outputs = self.all_outputs elif time_data: my_all_outputs = self.all_time_outputs elif redshift_data: my_all_outputs = self.all_redshift_outputs else: raise InvalidSimulationTimeSeries('Both time_data and redshift_data are False.') if not my_all_outputs: DatasetSeries.__init__(self, outputs=[], parallel=parallel) mylog.info("0 outputs loaded into time series.") return # Apply selection criteria to the set. if times is not None: my_outputs = self._get_outputs_by_key("time", times, tolerance=tolerance, outputs=my_all_outputs) elif redshifts is not None: my_outputs = self._get_outputs_by_key("redshift", redshifts, tolerance=tolerance, outputs=my_all_outputs) elif initial_cycle is not None or final_cycle is not None: if initial_cycle is None: initial_cycle = 0 else: initial_cycle = max(initial_cycle, 0) if final_cycle is None: final_cycle = self.parameters['StopCycle'] else: final_cycle = min(final_cycle, self.parameters['StopCycle']) my_outputs = my_all_outputs[int(ceil(float(initial_cycle) / self.parameters['CycleSkipDataDump'])): (final_cycle / self.parameters['CycleSkipDataDump'])+1] else: if initial_time is not None: if isinstance(initial_time, float): initial_time = self.quan(initial_time, "code_time") elif isinstance(initial_time, tuple) and len(initial_time) == 2: initial_time = self.quan(*initial_time) elif not isinstance(initial_time, YTArray): raise RuntimeError( "Error: initial_time must be given as a float or " + "tuple of (value, units).") elif initial_redshift is not None: my_initial_time = self.cosmology.t_from_z(initial_redshift) else: my_initial_time = self.initial_time if final_time is not None: if isinstance(final_time, float): final_time = self.quan(final_time, "code_time") elif isinstance(final_time, tuple) and len(final_time) == 2: final_time = self.quan(*final_time) elif not isinstance(final_time, YTArray): raise RuntimeError( "Error: final_time must be given as a float or " + "tuple of (value, units).") my_final_time = final_time.in_units("s") elif final_redshift is not None: my_final_time = self.cosmology.t_from_z(final_redshift) else: my_final_time = self.final_time my_initial_time.convert_to_units("s") my_final_time.convert_to_units("s") my_times = np.array([a['time'] for a in my_all_outputs]) my_indices = np.digitize([my_initial_time, my_final_time], my_times) if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1 my_outputs = my_all_outputs[my_indices[0]:my_indices[1]] init_outputs = [] for output in my_outputs: if os.path.exists(output['filename']): init_outputs.append(output['filename']) DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel, setup_function=setup_function) mylog.info("%d outputs loaded into time series.", len(init_outputs))
def load(*args ,**kwargs): """ This function attempts to determine the base data type of a filename or other set of arguments by calling :meth:`yt.data_objects.static_output.Dataset._is_valid` until it finds a match, at which point it returns an instance of the appropriate :class:`yt.data_objects.static_output.Dataset` subclass. """ args = _sanitize_load_args(*args) candidates = [] valid_file = [] for argno, arg in enumerate(args): if isinstance(arg, str): if os.path.exists(arg): valid_file.append(True) elif arg.startswith("http"): valid_file.append(True) else: if os.path.exists(os.path.join(ytcfg.get("yt", "test_data_dir"), arg)): valid_file.append(True) args[argno] = os.path.join(ytcfg.get("yt", "test_data_dir"), arg) else: valid_file.append(False) else: valid_file.append(False) types_to_check = output_type_registry if not any(valid_file): try: from yt.data_objects.time_series import DatasetSeries ts = DatasetSeries.from_filenames(*args, **kwargs) return ts except (TypeError, YTOutputNotIdentified): pass # We check if either the first argument is a dict or list, in which # case we try identifying candidates. if len(args) > 0 and isinstance(args[0], (list, dict)): # This fixes issues where it is assumed the first argument is a # file types_to_check = dict((n, v) for n, v in output_type_registry.items() if n.startswith("stream_")) # Better way to do this is to override the output_type_registry else: mylog.error("None of the arguments provided to load() is a valid file") mylog.error("Please check that you have used a correct path") raise YTOutputNotIdentified(args, kwargs) for n, c in types_to_check.items(): if n is None: continue if c._is_valid(*args, **kwargs): candidates.append(n) # convert to classes candidates = [output_type_registry[c] for c in candidates] # Find only the lowest subclasses, i.e. most specialised front ends candidates = find_lowest_subclasses(candidates) if len(candidates) == 1: return candidates[0](*args, **kwargs) if len(candidates) == 0: if ytcfg.get("yt", "enzo_db") != '' \ and len(args) == 1 \ and isinstance(args[0], str): erdb = EnzoRunDatabase() fn = erdb.find_uuid(args[0]) n = "EnzoDataset" if n in output_type_registry \ and output_type_registry[n]._is_valid(fn): return output_type_registry[n](fn) mylog.error("Couldn't figure out output type for %s", args[0]) raise YTOutputNotIdentified(args, kwargs) mylog.error("Multiple output type candidates for %s:", args[0]) for c in candidates: mylog.error(" Possible: %s", c) raise YTOutputNotIdentified(args, kwargs)
def load(fn, *args, **kwargs): """ Load a Dataset or DatasetSeries object. The data format is automatically discovered, and the exact return type is the corresponding subclass of :class:`yt.data_objects.static_output.Dataset`. A :class:`yt.data_objects.time_series.DatasetSeries` is created if the first argument is a pattern. Parameters ---------- fn : str, os.Pathlike, or byte (types supported by os.path.expandusers) A path to the data location. This can be a file name, directory name, a glob pattern, or a url (for data types that support it). Additional arguments, if any, are passed down to the return class. Returns ------- :class:`yt.data_objects.static_output.Dataset` object If fn is a single path, create a Dataset from the appropriate subclass. :class:`yt.data_objects.time_series.DatasetSeries` If fn is a glob pattern (i.e. containing wildcards '[]?!*'), create a series. Raises ------ FileNotFoundError If fn does not match any existing file or directory. yt.utilities.exceptions.YTUnidentifiedDataType If fn matches existing files or directories with undetermined format. yt.utilities.exceptions.YTAmbiguousDataType If the data format matches more than one class of similar specilization levels. """ fn = os.path.expanduser(fn) if any(wildcard in fn for wildcard in "[]?!*"): from yt.data_objects.time_series import DatasetSeries return DatasetSeries(fn, *args, **kwargs) # Unless the dataset starts with http # look for it using the path or relative to the data dir (in this order). if not (os.path.exists(fn) or fn.startswith("http")): data_dir = ytcfg.get("yt", "test_data_dir") alt_fn = os.path.join(data_dir, fn) if os.path.exists(alt_fn): fn = alt_fn else: msg = f"No such file or directory: '{fn}'." if os.path.exists(data_dir): msg += f"\n(Also tried '{alt_fn}')." raise FileNotFoundError(msg) candidates = [] for cls in output_type_registry.values(): if cls._is_valid(fn, *args, **kwargs): candidates.append(cls) # Find only the lowest subclasses, i.e. most specialised front ends candidates = find_lowest_subclasses(candidates) if len(candidates) == 1: return candidates[0](fn, *args, **kwargs) if len(candidates) > 1: raise YTAmbiguousDataType(fn, candidates) raise YTUnidentifiedDataType(fn, *args, **kwargs)
def test_init_fake_dataseries(): file_list = [f"fake_data_file_{str(i).zfill(4)}" for i in range(10)] with tempfile.TemporaryDirectory() as tmpdir: pfile_list = [Path(tmpdir) / file for file in file_list] sfile_list = [str(file) for file in pfile_list] for file in pfile_list: file.touch() pattern = Path(tmpdir) / "fake_data_file_*" # init from str pattern ts = DatasetSeries(pattern) assert ts._pre_outputs == sfile_list # init from Path pattern ppattern = Path(pattern) ts = DatasetSeries(ppattern) assert ts._pre_outputs == sfile_list # init form str list ts = DatasetSeries(sfile_list) assert ts._pre_outputs == sfile_list # init form Path list ts = DatasetSeries(pfile_list) assert ts._pre_outputs == pfile_list # rejected input type (str repr of a list) "[file1, file2, ...]" assert_raises(FileNotFoundError, DatasetSeries, str(file_list)) # finally, check that ts[0] fails to actually load assert_raises(YTUnidentifiedDataType, ts.__getitem__, 0) class FakeDataset(Dataset): """A minimal loadable fake dataset subclass""" @classmethod def _is_valid(cls, *args, **kwargs): return True def _parse_parameter_file(self): return def _set_code_unit_attributes(self): return def set_code_units(self): self.current_time = 0 return def _hash(self): return def _setup_classes(self): return try: ds = DatasetSeries(pattern)[0] assert isinstance(ds, FakeDataset) ts = DatasetSeries(pattern, my_unsupported_kwarg=None) assert_raises(TypeError, ts.__getitem__, 0) # the exact error message is supposed to be this # """__init__() got an unexpected keyword argument 'my_unsupported_kwarg'""" # but it's hard to check for within the framework finally: # tear down to avoid possible breakage in following tests output_type_registry.pop("FakeDataset")
def get_time_series( self, time_data=True, redshift_data=True, initial_time=None, final_time=None, initial_redshift=None, final_redshift=None, initial_cycle=None, final_cycle=None, times=None, redshifts=None, tolerance=None, parallel=True, setup_function=None, ): """ Instantiate a DatasetSeries object for a set of outputs. If no additional keywords given, a DatasetSeries object will be created with all potential datasets created by the simulation. Outputs can be gather by specifying a time or redshift range (or combination of time and redshift), with a specific list of times or redshifts, a range of cycle numbers (for cycle based output), or by simply searching all subdirectories within the simulation directory. time_data : bool Whether or not to include time outputs when gathering datasets for time series. Default: True. redshift_data : bool Whether or not to include redshift outputs when gathering datasets for time series. Default: True. initial_time : tuple of type (float, str) The earliest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (5.0, "Gyr"). If None, the initial time of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_time : tuple of type (float, str) The latest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (13.7, "Gyr"). If None, the final time of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. times : tuple of type (float array, str) A list of times for which outputs will be found and the units of those values. For example, ([0, 1, 2, 3], "s"). Default: None. initial_redshift : float The earliest redshift for outputs to be included. If None, the initial redshift of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_redshift : float The latest redshift for outputs to be included. If None, the final redshift of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. redshifts : array_like A list of redshifts for which outputs will be found. Default: None. initial_cycle : float The earliest cycle for outputs to be included. If None, the initial cycle of the simulation is used. This can only be used with final_cycle. Default: None. final_cycle : float The latest cycle for outputs to be included. If None, the final cycle of the simulation is used. This can only be used in combination with initial_cycle. Default: None. tolerance : float Used in combination with "times" or "redshifts" keywords, this is the tolerance within which outputs are accepted given the requested times or redshifts. If None, the nearest output is always taken. Default: None. parallel : bool/int If True, the generated DatasetSeries will divide the work such that a single processor works on each dataset. If an integer is supplied, the work will be divided into that number of jobs. Default: True. setup_function : callable, accepts a ds This function will be called whenever a dataset is loaded. Examples -------- >>> import yt >>> es = yt.load_simulation("enzo_tiny_cosmology/32Mpc_32.enzo", "Enzo") >>> es.get_time_series( ... initial_redshift=10, final_time=(13.7, "Gyr"), redshift_data=False ... ) >>> for ds in es: ... print(ds.current_time) >>> es.get_time_series(redshifts=[3, 2, 1, 0]) >>> for ds in es: ... print(ds.current_time) """ if ( initial_redshift is not None or final_redshift is not None ) and not self.cosmological_simulation: raise InvalidSimulationTimeSeries( "An initial or final redshift has been given for a " + "noncosmological simulation." ) if time_data and redshift_data: my_all_outputs = self.all_outputs elif time_data: my_all_outputs = self.all_time_outputs elif redshift_data: my_all_outputs = self.all_redshift_outputs else: raise InvalidSimulationTimeSeries( "Both time_data and redshift_data are False." ) if not my_all_outputs: DatasetSeries.__init__(self, outputs=[], parallel=parallel) mylog.info("0 outputs loaded into time series.") return # Apply selection criteria to the set. if times is not None: my_outputs = self._get_outputs_by_key( "time", times, tolerance=tolerance, outputs=my_all_outputs ) elif redshifts is not None: my_outputs = self._get_outputs_by_key( "redshift", redshifts, tolerance=tolerance, outputs=my_all_outputs ) elif initial_cycle is not None or final_cycle is not None: if initial_cycle is None: initial_cycle = 0 else: initial_cycle = max(initial_cycle, 0) if final_cycle is None: final_cycle = self.parameters["StopCycle"] else: final_cycle = min(final_cycle, self.parameters["StopCycle"]) my_outputs = my_all_outputs[ int( np.ceil(float(initial_cycle) / self.parameters["CycleSkipDataDump"]) ) : (final_cycle / self.parameters["CycleSkipDataDump"]) + 1 ] else: if initial_time is not None: if isinstance(initial_time, float): my_initial_time = self.quan(initial_time, "code_time") elif isinstance(initial_time, tuple) and len(initial_time) == 2: my_initial_time = self.quan(*initial_time) elif not isinstance(initial_time, unyt_array): raise RuntimeError( "Error: initial_time must be given as a float or " + "tuple of (value, units)." ) elif initial_redshift is not None: my_initial_time = self.cosmology.t_from_z(initial_redshift) else: my_initial_time = self.initial_time if final_time is not None: if isinstance(final_time, float): my_final_time = self.quan(final_time, "code_time") elif isinstance(final_time, tuple) and len(final_time) == 2: my_final_time = self.quan(*final_time) elif not isinstance(final_time, unyt_array): raise RuntimeError( "Error: final_time must be given as a float or " + "tuple of (value, units)." ) elif final_redshift is not None: my_final_time = self.cosmology.t_from_z(final_redshift) else: my_final_time = self.final_time my_initial_time.convert_to_units("s") my_final_time.convert_to_units("s") my_times = np.array([a["time"] for a in my_all_outputs]) my_indices = np.digitize([my_initial_time, my_final_time], my_times) if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1 my_outputs = my_all_outputs[my_indices[0] : my_indices[1]] init_outputs = [] for output in my_outputs: if os.path.exists(output["filename"]): init_outputs.append(output["filename"]) DatasetSeries.__init__( self, outputs=init_outputs, parallel=parallel, setup_function=setup_function ) mylog.info("%d outputs loaded into time series.", len(init_outputs))
def load(fn, *args, **kwargs): """ Load a Dataset or DatasetSeries object. The data format is automatically discovered, and the exact return type is the corresponding subclass of :class:`yt.data_objects.static_output.Dataset`. A :class:`yt.data_objects.time_series.DatasetSeries` is created if the first argument is a pattern. Parameters ---------- fn : str, os.Pathlike, or byte (types supported by os.path.expandusers) A path to the data location. This can be a file name, directory name, a glob pattern, or a url (for data types that support it). Additional arguments, if any, are passed down to the return class. Returns ------- :class:`yt.data_objects.static_output.Dataset` object If fn is a single path, create a Dataset from the appropriate subclass. :class:`yt.data_objects.time_series.DatasetSeries` If fn is a glob pattern (i.e. containing wildcards '[]?!*'), create a series. Raises ------ FileNotFoundError If fn does not match any existing file or directory. yt.utilities.exceptions.YTUnidentifiedDataType If fn matches existing files or directories with undetermined format. yt.utilities.exceptions.YTAmbiguousDataType If the data format matches more than one class of similar specilization levels. """ fn = os.path.expanduser(fn) if any(wildcard in fn for wildcard in "[]?!*"): from yt.data_objects.time_series import DatasetSeries return DatasetSeries(fn, *args, **kwargs) # This will raise FileNotFoundError if the path isn't matched # either in the current dir or yt.config.ytcfg['data_dir_directory'] if not fn.startswith("http"): fn = str(lookup_on_disk_data(fn)) candidates = [] for cls in output_type_registry.values(): if cls._is_valid(fn, *args, **kwargs): candidates.append(cls) # Find only the lowest subclasses, i.e. most specialised front ends candidates = find_lowest_subclasses(candidates) if len(candidates) == 1: return candidates[0](fn, *args, **kwargs) if len(candidates) > 1: raise YTAmbiguousDataType(fn, candidates) raise YTUnidentifiedDataType(fn, *args, **kwargs)
def load(*args ,**kwargs): """ This function attempts to determine the base data type of a filename or other set of arguments by calling :meth:`yt.data_objects.api.Dataset._is_valid` until it finds a match, at which point it returns an instance of the appropriate :class:`yt.data_objects.api.Dataset` subclass. """ if len(args) == 0: try: from yt.extern.six.moves import tkinter import tkinter, tkFileDialog except ImportError: raise YTOutputNotIdentified(args, kwargs) root = tkinter.Tk() filename = tkFileDialog.askopenfilename(parent=root,title='Choose a file') if filename != None: return load(filename) else: raise YTOutputNotIdentified(args, kwargs) candidates = [] args = [os.path.expanduser(arg) if isinstance(arg, str) else arg for arg in args] valid_file = [] for argno, arg in enumerate(args): if isinstance(arg, str): if os.path.exists(arg): valid_file.append(True) elif arg.startswith("http"): valid_file.append(True) else: if os.path.exists(os.path.join(ytcfg.get("yt", "test_data_dir"), arg)): valid_file.append(True) args[argno] = os.path.join(ytcfg.get("yt", "test_data_dir"), arg) else: valid_file.append(False) else: valid_file.append(False) if not any(valid_file): try: from yt.data_objects.time_series import DatasetSeries ts = DatasetSeries.from_filenames(*args, **kwargs) return ts except YTOutputNotIdentified: pass mylog.error("None of the arguments provided to load() is a valid file") mylog.error("Please check that you have used a correct path") raise YTOutputNotIdentified(args, kwargs) for n, c in output_type_registry.items(): if n is None: continue if c._is_valid(*args, **kwargs): candidates.append(n) # convert to classes candidates = [output_type_registry[c] for c in candidates] # Find only the lowest subclasses, i.e. most specialised front ends candidates = find_lowest_subclasses(candidates) if len(candidates) == 1: return candidates[0](*args, **kwargs) if len(candidates) == 0: if ytcfg.get("yt", "enzo_db") != '' \ and len(args) == 1 \ and isinstance(args[0], str): erdb = EnzoRunDatabase() fn = erdb.find_uuid(args[0]) n = "EnzoDataset" if n in output_type_registry \ and output_type_registry[n]._is_valid(fn): return output_type_registry[n](fn) mylog.error("Couldn't figure out output type for %s", args[0]) raise YTOutputNotIdentified(args, kwargs) mylog.error("Multiple output type candidates for %s:", args[0]) for c in candidates: mylog.error(" Possible: %s", c) raise YTOutputNotIdentified(args, kwargs)
def read_yt(filename): ds = DatasetSeries(filename) return YTGlueData(ds)
def __init__(self, outputs, indices, fields=None, suppress_logging=False): indices.sort() # Just in case the caller wasn't careful self.field_data = YTFieldData() if isinstance(outputs, DatasetSeries): self.data_series = outputs else: self.data_series = DatasetSeries(outputs) self.masks = [] self.sorts = [] self.array_indices = [] self.indices = indices self.num_indices = len(indices) self.num_steps = len(outputs) self.times = [] self.suppress_logging = suppress_logging if fields is None: fields = [] fields = list(OrderedDict.fromkeys(fields)) if self.suppress_logging: old_level = int(ytcfg.get("yt", "loglevel")) mylog.setLevel(40) fds = {} ds_first = self.data_series[0] dd_first = ds_first.all_data() idx_field = dd_first._determine_fields("particle_index")[0] for field in ("particle_position_%s" % ax for ax in "xyz"): fds[field] = dd_first._determine_fields(field)[0] my_storage = {} pbar = get_pbar("Constructing trajectory information", len(self.data_series)) for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)): dd = ds.all_data() newtags = dd[idx_field].ndarray_view().astype("int64") mask = np.in1d(newtags, indices, assume_unique=True) sort = np.argsort(newtags[mask]) array_indices = np.where( np.in1d(indices, newtags, assume_unique=True))[0] self.array_indices.append(array_indices) self.masks.append(mask) self.sorts.append(sort) pfields = {} for field in ("particle_position_%s" % ax for ax in "xyz"): pfields[field] = dd[fds[field]].ndarray_view()[mask][sort] sto.result_id = ds.parameter_filename sto.result = (ds.current_time, array_indices, pfields) pbar.update(i) pbar.finish() if self.suppress_logging: mylog.setLevel(old_level) times = [] for fn, (time, indices, pfields) in sorted(my_storage.items()): times.append(time) self.times = self.data_series[0].arr([time for time in times], times[0].units) self.particle_fields = [] output_field = np.empty((self.num_indices, self.num_steps)) output_field.fill(np.nan) for field in ("particle_position_%s" % ax for ax in "xyz"): for i, (fn, (time, indices, pfields)) in enumerate(sorted(my_storage.items())): output_field[indices, i] = pfields[field] self.field_data[field] = array_like_field(dd_first, output_field.copy(), fds[field]) self.particle_fields.append(field) # Instantiate fields the caller requested self._get_data(fields)
class ParticleTrajectories(object): r"""A collection of particle trajectories in time over a series of datasets. The ParticleTrajectories object contains a collection of particle trajectories for a specified set of particle indices. Parameters ---------- outputs : `yt.data_objects.time_series.DatasetSeries` or list of strings DatasetSeries object, or a time-sorted list of filenames to construct a new DatasetSeries object. indices : array_like An integer array of particle indices whose trajectories we want to track. If they are not sorted they will be sorted. fields : list of strings, optional A set of fields that is retrieved when the trajectory collection is instantiated. Default : None (will default to the fields 'particle_position_x', 'particle_position_y', 'particle_position_z') suppress_logging : boolean Suppress yt's logging when iterating over the simulation time series. Default : False Examples ________ >>> from yt.mods import * >>> my_fns = glob.glob("orbit_hdf5_chk_00[0-9][0-9]") >>> my_fns.sort() >>> fields = ["particle_position_x", "particle_position_y", >>> "particle_position_z", "particle_velocity_x", >>> "particle_velocity_y", "particle_velocity_z"] >>> ds = load(my_fns[0]) >>> init_sphere = ds.sphere(ds.domain_center, (.5, "unitary")) >>> indices = init_sphere["particle_index"].astype("int") >>> trajs = ParticleTrajectories(my_fns, indices, fields=fields) >>> for t in trajs : >>> print t["particle_velocity_x"].max(), t["particle_velocity_x"].min() """ def __init__(self, outputs, indices, fields=None, suppress_logging=False): indices.sort() # Just in case the caller wasn't careful self.field_data = YTFieldData() if isinstance(outputs, DatasetSeries): self.data_series = outputs else: self.data_series = DatasetSeries(outputs) self.masks = [] self.sorts = [] self.array_indices = [] self.indices = indices self.num_indices = len(indices) self.num_steps = len(outputs) self.times = [] self.suppress_logging = suppress_logging # Default fields if fields is None: fields = [] fields.append("particle_position_x") fields.append("particle_position_y") fields.append("particle_position_z") fields = list(OrderedDict.fromkeys(fields)) if self.suppress_logging: old_level = int(ytcfg.get("yt","loglevel")) mylog.setLevel(40) my_storage = {} pbar = get_pbar("Constructing trajectory information", len(self.data_series)) for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)): dd = ds.all_data() idx_field = dd._determine_fields("particle_index")[0] newtags = dd[idx_field].ndarray_view().astype("int64") mask = np.in1d(newtags, indices, assume_unique=True) sorts = np.argsort(newtags[mask]) self.array_indices.append(np.where(np.in1d(indices, newtags, assume_unique=True))[0]) self.masks.append(mask) self.sorts.append(sorts) sto.result_id = ds.parameter_filename sto.result = ds.current_time pbar.update(i) pbar.finish() if self.suppress_logging: mylog.setLevel(old_level) times = [] for fn, time in sorted(my_storage.items()): times.append(time) self.times = self.data_series[0].arr([time for time in times], times[0].units) self.particle_fields = [] # Instantiate fields the caller requested for field in fields: self._get_data(field) def has_key(self, key): return (key in self.field_data) def keys(self): return self.field_data.keys() def __getitem__(self, key): """ Get the field associated with key. """ if key == "particle_time": return self.times if key not in self.field_data: self._get_data(key) return self.field_data[key] def __setitem__(self, key, val): """ Sets a field to be some other value. """ self.field_data[key] = val def __delitem__(self, key): """ Delete the field from the trajectory """ del self.field_data[key] def __iter__(self): """ This iterates over the trajectories for the different particles, returning dicts of fields for each trajectory """ for idx in range(self.num_indices): traj = {} traj["particle_index"] = self.indices[idx] traj["particle_time"] = self.times for field in self.field_data.keys(): traj[field] = self[field][idx,:] yield traj def __len__(self): """ The number of individual trajectories """ return self.num_indices def add_fields(self, fields): """ Add a list of fields to an existing trajectory Parameters ---------- fields : list of strings A list of fields to be added to the current trajectory collection. Examples ________ >>> from yt.mods import * >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.add_fields(["particle_mass", "particle_gpot"]) """ for field in fields: if field not in self.field_data: self._get_data(field) def _get_data(self, field): """ Get a field to include in the trajectory collection. The trajectory collection itself is a dict of 2D numpy arrays, with shape (num_indices, num_steps) """ if field not in self.field_data: if self.suppress_logging: old_level = int(ytcfg.get("yt","loglevel")) mylog.setLevel(40) ds_first = self.data_series[0] dd_first = ds_first.all_data() fd = dd_first._determine_fields(field)[0] if field not in self.particle_fields: if self.data_series[0].field_info[fd].particle_type: self.particle_fields.append(field) particles = np.empty((self.num_indices,self.num_steps)) particles[:] = np.nan step = int(0) pbar = get_pbar("Generating field %s in trajectories." % (field), self.num_steps) my_storage={} for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)): mask = self.masks[i] sort = self.sorts[i] if field in self.particle_fields: # This is easy... just get the particle fields dd = ds.all_data() pfield = dd[fd].ndarray_view()[mask][sort] else: # This is hard... must loop over grids pfield = np.zeros((self.num_indices)) x = self["particle_position_x"][:,step].ndarray_view() y = self["particle_position_y"][:,step].ndarray_view() z = self["particle_position_z"][:,step].ndarray_view() # This will fail for non-grid index objects particle_grids, particle_grid_inds = ds.index._find_points(x,y,z) for grid in particle_grids: cube = grid.retrieve_ghost_zones(1, [fd]) CICSample_3(x,y,z,pfield, self.num_indices, cube[fd], np.array(grid.LeftEdge).astype(np.float64), np.array(grid.ActiveDimensions).astype(np.int32), grid.dds[0]) sto.result_id = ds.parameter_filename sto.result = (self.array_indices[i], pfield) pbar.update(step) step += 1 pbar.finish() for i, (fn, (indices, pfield)) in enumerate(sorted(my_storage.items())): particles[indices,i] = pfield self.field_data[field] = array_like_field(dd_first, particles, fd) if self.suppress_logging: mylog.setLevel(old_level) return self.field_data[field] def trajectory_from_index(self, index): """ Retrieve a single trajectory corresponding to a specific particle index Parameters ---------- index : int This defines which particle trajectory from the ParticleTrajectories object will be returned. Returns ------- A dictionary corresponding to the particle's trajectory and the fields along that trajectory Examples -------- >>> from yt.mods import * >>> import matplotlib.pylab as pl >>> trajs = ParticleTrajectories(my_fns, indices) >>> traj = trajs.trajectory_from_index(indices[0]) >>> pl.plot(traj["particle_time"], traj["particle_position_x"], "-x") >>> pl.savefig("orbit") """ mask = np.in1d(self.indices, (index,), assume_unique=True) if not np.any(mask): print("The particle index %d is not in the list!" % (index)) raise IndexError fields = [field for field in sorted(self.field_data.keys())] traj = {} traj["particle_time"] = self.times traj["particle_index"] = index for field in fields: traj[field] = self[field][mask,:][0] return traj @parallel_root_only def write_out(self, filename_base): """ Write out particle trajectories to tab-separated ASCII files (one for each trajectory) with the field names in the file header. Each file is named with a basename and the index number. Parameters ---------- filename_base : string The prefix for the outputted ASCII files. Examples -------- >>> from yt.mods import * >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.write_out("orbit_trajectory") """ fields = [field for field in sorted(self.field_data.keys())] num_fields = len(fields) first_str = "# particle_time\t" + "\t".join(fields)+"\n" template_str = "%g\t"*num_fields+"%g\n" for ix in range(self.num_indices): outlines = [first_str] for it in range(self.num_steps): outlines.append(template_str % tuple([self.times[it]]+[self[field][ix,it] for field in fields])) fid = open(filename_base + "_%d.dat" % self.indices[ix], "w") fid.writelines(outlines) fid.close() del fid @parallel_root_only def write_out_h5(self, filename): """ Write out all the particle trajectories to a single HDF5 file that contains the indices, the times, and the 2D array for each field individually Parameters ---------- filename : string The output filename for the HDF5 file Examples -------- >>> from yt.mods import * >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.write_out_h5("orbit_trajectories") """ fid = h5py.File(filename, "w") fields = [field for field in sorted(self.field_data.keys())] fid.create_dataset("particle_indices", dtype=np.int32, data=self.indices) fid.create_dataset("particle_time", data=self.times) for field in fields: fid.create_dataset("%s" % field, data=self[field]) fid.close()
def load(*args, **kwargs): """ This function attempts to determine the base data type of a filename or other set of arguments by calling :meth:`yt.data_objects.api.Dataset._is_valid` until it finds a match, at which point it returns an instance of the appropriate :class:`yt.data_objects.api.Dataset` subclass. """ if len(args) == 0: try: import Tkinter, tkFileDialog except ImportError: raise YTOutputNotIdentified(args, kwargs) root = Tkinter.Tk() filename = tkFileDialog.askopenfilename(parent=root, title='Choose a file') if filename != None: return load(filename) else: raise YTOutputNotIdentified(args, kwargs) candidates = [] args = [ os.path.expanduser(arg) if isinstance(arg, types.StringTypes) else arg for arg in args ] valid_file = [] for argno, arg in enumerate(args): if isinstance(arg, types.StringTypes): if os.path.exists(arg): valid_file.append(True) elif arg.startswith("http"): valid_file.append(True) else: if os.path.exists( os.path.join(ytcfg.get("yt", "test_data_dir"), arg)): valid_file.append(True) args[argno] = os.path.join( ytcfg.get("yt", "test_data_dir"), arg) else: valid_file.append(False) else: valid_file.append(False) if not any(valid_file): try: from yt.data_objects.time_series import DatasetSeries ts = DatasetSeries.from_filenames(*args, **kwargs) return ts except YTOutputNotIdentified: pass mylog.error("None of the arguments provided to load() is a valid file") mylog.error("Please check that you have used a correct path") raise YTOutputNotIdentified(args, kwargs) for n, c in output_type_registry.items(): if n is None: continue if c._is_valid(*args, **kwargs): candidates.append(n) if len(candidates) == 1: return output_type_registry[candidates[0]](*args, **kwargs) if len(candidates) == 0: if ytcfg.get("yt", "enzo_db") != '' \ and len(args) == 1 \ and isinstance(args[0], types.StringTypes): erdb = EnzoRunDatabase() fn = erdb.find_uuid(args[0]) n = "EnzoDataset" if n in output_type_registry \ and output_type_registry[n]._is_valid(fn): return output_type_registry[n](fn) mylog.error("Couldn't figure out output type for %s", args[0]) raise YTOutputNotIdentified(args, kwargs) mylog.error("Multiple output type candidates for %s:", args[0]) for c in candidates: mylog.error(" Possible: %s", c) raise YTOutputNotIdentified(args, kwargs)
def get_time_series(self, initial_time=None, final_time=None, initial_redshift=None, final_redshift=None, times=None, redshifts=None, tolerance=None, parallel=True, setup_function=None): """ Instantiate a DatasetSeries object for a set of outputs. If no additional keywords given, a DatasetSeries object will be created with all potential datasets created by the simulation. Outputs can be gather by specifying a time or redshift range (or combination of time and redshift), with a specific list of times or redshifts), or by simply searching all subdirectories within the simulation directory. initial_time : tuple of type (float, str) The earliest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (5.0, "Gyr"). If None, the initial time of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_time : tuple of type (float, str) The latest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (13.7, "Gyr"). If None, the final time of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. times : tuple of type (float array, str) A list of times for which outputs will be found and the units of those values. For example, ([0, 1, 2, 3], "s"). Default: None. initial_redshift : float The earliest redshift for outputs to be included. If None, the initial redshift of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_redshift : float The latest redshift for outputs to be included. If None, the final redshift of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. redshifts : array_like A list of redshifts for which outputs will be found. Default: None. tolerance : float Used in combination with "times" or "redshifts" keywords, this is the tolerance within which outputs are accepted given the requested times or redshifts. If None, the nearest output is always taken. Default: None. parallel : bool/int If True, the generated DatasetSeries will divide the work such that a single processor works on each dataset. If an integer is supplied, the work will be divided into that number of jobs. Default: True. setup_function : callable, accepts a ds This function will be called whenever a dataset is loaded. Examples -------- >>> import yt >>> gs = yt.simulation("my_simulation.par", "Gadget") >>> gs.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr")) >>> gs.get_time_series(redshifts=[3, 2, 1, 0]) >>> # after calling get_time_series >>> for ds in gs.piter(): ... p = ProjectionPlot(ds, "x", "density") ... p.save() >>> # An example using the setup_function keyword >>> def print_time(ds): ... print(ds.current_time) >>> gs.get_time_series(setup_function=print_time) >>> for ds in gs: ... SlicePlot(ds, "x", "Density").save() """ if (initial_redshift is not None or \ final_redshift is not None) and \ not self.cosmological_simulation: raise InvalidSimulationTimeSeries( "An initial or final redshift has been given for a " + "noncosmological simulation.") my_all_outputs = self.all_outputs if not my_all_outputs: DatasetSeries.__init__(self, outputs=[], parallel=parallel, unit_base=self.unit_base) mylog.info("0 outputs loaded into time series.") return # Apply selection criteria to the set. if times is not None: my_outputs = self._get_outputs_by_key("time", times, tolerance=tolerance, outputs=my_all_outputs) elif redshifts is not None: my_outputs = self._get_outputs_by_key("redshift", redshifts, tolerance=tolerance, outputs=my_all_outputs) else: if initial_time is not None: if isinstance(initial_time, float): initial_time = self.quan(initial_time, "code_time") elif isinstance(initial_time, tuple) and len(initial_time) == 2: initial_time = self.quan(*initial_time) elif not isinstance(initial_time, unyt_array): raise RuntimeError( "Error: initial_time must be given as a float or " + "tuple of (value, units).") elif initial_redshift is not None: my_initial_time = self.cosmology.t_from_z(initial_redshift) else: my_initial_time = self.initial_time if final_time is not None: if isinstance(final_time, float): final_time = self.quan(final_time, "code_time") elif isinstance(final_time, tuple) and len(final_time) == 2: final_time = self.quan(*final_time) elif not isinstance(final_time, unyt_array): raise RuntimeError( "Error: final_time must be given as a float or " + "tuple of (value, units).") my_final_time = final_time.in_units("s") elif final_redshift is not None: my_final_time = self.cosmology.t_from_z(final_redshift) else: my_final_time = self.final_time my_initial_time.convert_to_units("s") my_final_time.convert_to_units("s") my_times = np.array([a["time"] for a in my_all_outputs]) my_indices = np.digitize([my_initial_time, my_final_time], my_times) if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1 my_outputs = my_all_outputs[my_indices[0]:my_indices[1]] init_outputs = [] for output in my_outputs: if os.path.exists(output["filename"]): init_outputs.append(output["filename"]) if len(init_outputs) == 0 and len(my_outputs) > 0: mylog.warning("Could not find any datasets. " + "Check the value of OutputDir in your parameter file.") DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel, setup_function=setup_function, unit_base=self.unit_base) mylog.info("%d outputs loaded into time series.", len(init_outputs))
if not os.path.exists('%s/rockstar/' % sim.path()): os.mkdir('%s/rockstar/' % sim.path()) #cd to rockstar/ os.chdir('rockstar/') print 'In dir: %s' % os.getcwd() print 'Starting rockstar...' outputs = np.arange(1, sim.num_snapshots() + 1) dirs = [] #Add the datasets for ioutput in outputs: #ds = yt.load('../output_%05d/info_%05d.txt'%(ioutput, ioutput)) ds = sim.snapshot(ioutput, module='yt').raw_snapshot() #assert(ds.add_particle_filter("dark_matter")) dirs.append(ds) #es = yt.load('../output_*/info_*.txt') es = DatasetSeries(dirs, setup_function=setup_ds) #es = DatasetSeries(dirs) readers = int(ncpu / 4.) #Reserve one cpu for the server writers = ncpu - readers - 1 print 'Running rockstar with %i writers and %i readers' % (writers, readers) rh = RockstarHaloFinder(es, num_readers=readers, num_writers=writers, particle_type="dark_matter") rh.run()