class ParticleTrajectories: r"""A collection of particle trajectories in time over a series of datasets. Parameters ---------- outputs : ~yt.data_objects.time_series.DatasetSeries DatasetSeries object from which to draw the particles. indices : array_like An integer array of particle indices whose trajectories we want to track. If they are not sorted they will be sorted. fields : list of strings, optional A set of fields that is retrieved when the trajectory collection is instantiated. Default: None (will default to the fields 'particle_position_x', 'particle_position_y', 'particle_position_z') suppress_logging : boolean Suppress yt's logging when iterating over the simulation time series. Default: False ptype : str, optional Only use this particle type. Default: None, which uses all particle type. Examples -------- >>> my_fns = glob.glob("orbit_hdf5_chk_00[0-9][0-9]") >>> my_fns.sort() >>> fields = ["particle_position_x", "particle_position_y", >>> "particle_position_z", "particle_velocity_x", >>> "particle_velocity_y", "particle_velocity_z"] >>> ds = load(my_fns[0]) >>> init_sphere = ds.sphere(ds.domain_center, (.5, "unitary")) >>> indices = init_sphere["particle_index"].astype("int") >>> ts = DatasetSeries(my_fns) >>> trajs = ts.particle_trajectories(indices, fields=fields) >>> for t in trajs : >>> print(t["particle_velocity_x"].max(), t["particle_velocity_x"].min()) """ def __init__(self, outputs, indices, fields=None, suppress_logging=False, ptype=None): indices.sort() # Just in case the caller wasn't careful self.field_data = YTFieldData() self.data_series = outputs self.masks = [] self.sorts = [] self.array_indices = [] self.indices = indices self.num_indices = len(indices) self.num_steps = len(outputs) self.times = [] self.suppress_logging = suppress_logging self.ptype = ptype if fields is None: fields = [] fields = list(OrderedDict.fromkeys(fields)) if self.suppress_logging: old_level = int(ytcfg.get("yt", "loglevel")) mylog.setLevel(40) ds_first = self.data_series[0] dd_first = ds_first.all_data() fds = {} for field in ( "particle_index", "particle_position_x", "particle_position_y", "particle_position_z", ): fds[field] = self._get_full_field_name(field)[0] my_storage = {} pbar = get_pbar("Constructing trajectory information", len(self.data_series)) for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)): dd = ds.all_data() newtags = dd[fds["particle_index"]].d.astype("int64") mask = np.in1d(newtags, indices, assume_unique=True) sort = np.argsort(newtags[mask]) array_indices = np.where( np.in1d(indices, newtags, assume_unique=True))[0] self.array_indices.append(array_indices) self.masks.append(mask) self.sorts.append(sort) pfields = {} for field in (f"particle_position_{ax}" for ax in "xyz"): pfields[field] = dd[fds[field]].ndarray_view()[mask][sort] sto.result_id = ds.parameter_filename sto.result = (ds.current_time, array_indices, pfields) pbar.update(i) pbar.finish() if self.suppress_logging: mylog.setLevel(old_level) sorted_storage = sorted(my_storage.items()) times = [time for _fn, (time, *_) in sorted_storage] self.times = self.data_series[0].arr(times, times[0].units) self.particle_fields = [] output_field = np.empty((self.num_indices, self.num_steps)) output_field.fill(np.nan) for field in (f"particle_position_{ax}" for ax in "xyz"): for i, (_fn, (_time, indices, pfields)) in enumerate(sorted_storage): try: # This will fail if particles ids are # duplicate. This is due to the fact that the rhs # would then have a different shape as the lhs output_field[indices, i] = pfields[field] except ValueError as e: raise YTIllDefinedParticleData( "This dataset contains duplicate particle indices!" ) from e self.field_data[field] = array_like_field(dd_first, output_field.copy(), fds[field]) self.particle_fields.append(field) # Instantiate fields the caller requested self._get_data(fields) def has_key(self, key): return key in self.field_data def keys(self): return self.field_data.keys() def _get_full_field_name(self, field): ds_first = self.data_series[0] dd_first = ds_first.all_data() ptype = self.ptype if self.ptype else "all" return dd_first._determine_fields((ptype, field)) def __getitem__(self, key): """ Get the field associated with key. """ if key == "particle_time": return self.times if key not in self.field_data: self._get_data([key]) return self.field_data[key] def __setitem__(self, key, val): """ Sets a field to be some other value. """ self.field_data[key] = val def __delitem__(self, key): """ Delete the field from the trajectory """ del self.field_data[key] def __iter__(self): """ This iterates over the trajectories for the different particles, returning dicts of fields for each trajectory """ for idx in range(self.num_indices): traj = {} traj["particle_index"] = self.indices[idx] traj["particle_time"] = self.times for field in self.field_data.keys(): traj[field] = self[field][idx, :] yield traj def __len__(self): """ The number of individual trajectories """ return self.num_indices def add_fields(self, fields): """ Add a list of fields to an existing trajectory Parameters ---------- fields : list of strings A list of fields to be added to the current trajectory collection. Examples ________ >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.add_fields(["particle_mass", "particle_gpot"]) """ self._get_data(fields) def _get_data(self, fields): """ Get a list of fields to include in the trajectory collection. The trajectory collection itself is a dict of 2D numpy arrays, with shape (num_indices, num_steps) """ missing_fields = [ field for field in fields if field not in self.field_data ] if not missing_fields: return if self.suppress_logging: old_level = int(ytcfg.get("yt", "loglevel")) mylog.setLevel(40) ds_first = self.data_series[0] dd_first = ds_first.all_data() fds = {} new_particle_fields = [] for field in missing_fields: fds[field] = dd_first._determine_fields(field)[0] if field not in self.particle_fields: if self.data_series[0]._get_field_info( *fds[field]).particle_type: self.particle_fields.append(field) new_particle_fields.append(field) grid_fields = [ field for field in missing_fields if field not in self.particle_fields ] step = int(0) pbar = get_pbar( f"Generating [{', '.join(missing_fields)}] fields in trajectories", self.num_steps, ) my_storage = {} for i, (sto, ds) in enumerate(self.data_series.piter(storage=my_storage)): mask = self.masks[i] sort = self.sorts[i] pfield = {} if new_particle_fields: # there's at least one particle field dd = ds.all_data() for field in new_particle_fields: # This is easy... just get the particle fields pfield[field] = dd[fds[field]].d[mask][sort] if grid_fields: # This is hard... must loop over grids for field in grid_fields: pfield[field] = np.zeros(self.num_indices) x = self["particle_position_x"][:, step].d y = self["particle_position_y"][:, step].d z = self["particle_position_z"][:, step].d particle_grids, particle_grid_inds = ds.index._find_points( x, y, z) # This will fail for non-grid index objects for grid in particle_grids: cube = grid.retrieve_ghost_zones(1, grid_fields) for field in grid_fields: CICSample_3( x, y, z, pfield[field], self.num_indices, cube[fds[field]], np.array(grid.LeftEdge).astype(np.float64), np.array(grid.ActiveDimensions).astype(np.int32), grid.dds[0], ) sto.result_id = ds.parameter_filename sto.result = (self.array_indices[i], pfield) pbar.update(step) step += 1 pbar.finish() output_field = np.empty((self.num_indices, self.num_steps)) output_field.fill(np.nan) for field in missing_fields: fd = fds[field] for i, (_fn, (indices, pfield)) in enumerate(sorted(my_storage.items())): output_field[indices, i] = pfield[field] self.field_data[field] = array_like_field(dd_first, output_field.copy(), fd) if self.suppress_logging: mylog.setLevel(old_level) def trajectory_from_index(self, index): """ Retrieve a single trajectory corresponding to a specific particle index Parameters ---------- index : int This defines which particle trajectory from the ParticleTrajectories object will be returned. Returns ------- A dictionary corresponding to the particle's trajectory and the fields along that trajectory Examples -------- >>> from yt.mods import * >>> import matplotlib.pylab as pl >>> trajs = ParticleTrajectories(my_fns, indices) >>> traj = trajs.trajectory_from_index(indices[0]) >>> pl.plot(traj["particle_time"], traj["particle_position_x"], "-x") >>> pl.savefig("orbit") """ mask = np.in1d(self.indices, (index, ), assume_unique=True) if not np.any(mask): print("The particle index %d is not in the list!" % (index)) raise IndexError fields = [field for field in sorted(self.field_data.keys())] traj = {} traj["particle_time"] = self.times traj["particle_index"] = index for field in fields: traj[field] = self[field][mask, :][0] return traj @parallel_root_only def write_out(self, filename_base): """ Write out particle trajectories to tab-separated ASCII files (one for each trajectory) with the field names in the file header. Each file is named with a basename and the index number. Parameters ---------- filename_base : string The prefix for the outputted ASCII files. Examples -------- >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.write_out("orbit_trajectory") """ fields = [field for field in sorted(self.field_data.keys())] num_fields = len(fields) first_str = "# particle_time\t" + "\t".join(fields) + "\n" template_str = "%g\t" * num_fields + "%g\n" for ix in range(self.num_indices): outlines = [first_str] for it in range(self.num_steps): outlines.append( template_str % tuple([self.times[it]] + [self[field][ix, it] for field in fields])) fid = open(filename_base + "_%d.dat" % self.indices[ix], "w") fid.writelines(outlines) fid.close() del fid @parallel_root_only def write_out_h5(self, filename): """ Write out all the particle trajectories to a single HDF5 file that contains the indices, the times, and the 2D array for each field individually Parameters ---------- filename : string The output filename for the HDF5 file Examples -------- >>> trajs = ParticleTrajectories(my_fns, indices) >>> trajs.write_out_h5("orbit_trajectories") """ fid = h5py.File(filename, mode="w") fid.create_dataset("particle_indices", dtype=np.int64, data=self.indices) fid.close() self.times.write_hdf5(filename, dataset_name="particle_times") fields = [field for field in sorted(self.field_data.keys())] for field in fields: self[field].write_hdf5(filename, dataset_name=f"{field}")
class ProfileND(ParallelAnalysisInterface): """The profile object class""" def __init__(self, data_source, weight_field=None): self.data_source = data_source self.ds = data_source.ds self.field_map = {} self.field_info = {} self.field_data = YTFieldData() if weight_field is not None: self.standard_deviation = YTFieldData() weight_field = self.data_source._determine_fields(weight_field)[0] else: self.standard_deviation = None self.weight_field = weight_field self.field_units = {} ParallelAnalysisInterface.__init__(self, comm=data_source.comm) @property def variance(self): issue_deprecation_warning(""" profile.variance incorrectly returns the profile standard deviation and has been deprecated, use profile.standard_deviation instead.""") return self.standard_deviation def add_fields(self, fields): """Add fields to profile Parameters ---------- fields : list of field names A list of fields to create profile histograms for """ fields = self.data_source._determine_fields(fields) for f in fields: self.field_info[f] = self.data_source.ds.field_info[f] temp_storage = ProfileFieldAccumulator(len(fields), self.size) citer = self.data_source.chunks([], "io") for chunk in parallel_objects(citer): self._bin_chunk(chunk, fields, temp_storage) self._finalize_storage(fields, temp_storage) def set_field_unit(self, field, new_unit): """Sets a new unit for the requested field Parameters ---------- field : string or field tuple The name of the field that is to be changed. new_unit : string or Unit object The name of the new unit. """ if field in self.field_units: self.field_units[field] = \ Unit(new_unit, registry=self.ds.unit_registry) else: fd = self.field_map[field] if fd in self.field_units: self.field_units[fd] = \ Unit(new_unit, registry=self.ds.unit_registry) else: raise KeyError("%s not in profile!" % (field)) def _finalize_storage(self, fields, temp_storage): # We use our main comm here # This also will fill _field_data for i, field in enumerate(fields): # q values are returned as q * weight but we want just q temp_storage.qvalues[..., i][temp_storage.used] /= \ temp_storage.weight_values[temp_storage.used] # get the profile data from all procs all_store = {self.comm.rank: temp_storage} all_store = self.comm.par_combine_object(all_store, "join", datatype="dict") all_val = np.zeros_like(temp_storage.values) all_mean = np.zeros_like(temp_storage.mvalues) all_std = np.zeros_like(temp_storage.qvalues) all_weight = np.zeros_like(temp_storage.weight_values) all_used = np.zeros_like(temp_storage.used, dtype="bool") # Combine the weighted mean and standard deviation from each processor. # For two samples with total weight, mean, and standard deviation # given by w, m, and s, their combined mean and standard deviation are: # m12 = (m1 * w1 + m2 * w2) / (w1 + w2) # s12 = (m1 * (s1**2 + (m1 - m12)**2) + # m2 * (s2**2 + (m2 - m12)**2)) / (w1 + w2) # Here, the mvalues are m and the qvalues are s**2. for p in sorted(all_store.keys()): all_used += all_store[p].used old_mean = all_mean.copy() old_weight = all_weight.copy() all_weight[all_store[p].used] += \ all_store[p].weight_values[all_store[p].used] for i, field in enumerate(fields): all_val[..., i][all_store[p].used] += \ all_store[p].values[..., i][all_store[p].used] all_mean[..., i][all_store[p].used] = \ (all_mean[..., i] * old_weight + all_store[p].mvalues[..., i] * all_store[p].weight_values)[all_store[p].used] / \ all_weight[all_store[p].used] all_std[..., i][all_store[p].used] = \ (old_weight * (all_std[..., i] + (old_mean[..., i] - all_mean[..., i])**2) + all_store[p].weight_values * (all_store[p].qvalues[..., i] + (all_store[p].mvalues[..., i] - all_mean[..., i])**2))[all_store[p].used] / \ all_weight[all_store[p].used] all_std = np.sqrt(all_std) del all_store self.used = all_used blank = ~all_used self.weight = all_weight self.weight[blank] = 0.0 for i, field in enumerate(fields): if self.weight_field is None: self.field_data[field] = \ array_like_field(self.data_source, all_val[...,i], field) else: self.field_data[field] = \ array_like_field(self.data_source, all_mean[...,i], field) self.standard_deviation[field] = \ array_like_field(self.data_source, all_std[...,i], field) self.standard_deviation[field][blank] = 0.0 self.field_data[field][blank] = 0.0 self.field_units[field] = self.field_data[field].units if isinstance(field, tuple): self.field_map[field[1]] = field else: self.field_map[field] = field def _bin_chunk(self, chunk, fields, storage): raise NotImplementedError def _filter(self, bin_fields): # cut_points is set to be everything initially, but # we also want to apply a filtering based on min/max filter = np.ones(bin_fields[0].shape, dtype='bool') for (mi, ma), data in zip(self.bounds, bin_fields): filter &= (data > mi) filter &= (data < ma) return filter, [data[filter] for data in bin_fields] def _get_data(self, chunk, fields): # We are using chunks now, which will manage the field parameters and # the like. bin_fields = [chunk[bf] for bf in self.bin_fields] # We want to make sure that our fields are within the bounds of the # binning filter, bin_fields = self._filter(bin_fields) if not np.any(filter): return None arr = np.zeros((bin_fields[0].size, len(fields)), dtype="float64") for i, field in enumerate(fields): units = chunk.ds.field_info[field].output_units arr[:, i] = chunk[field][filter].in_units(units) if self.weight_field is not None: units = chunk.ds.field_info[self.weight_field].output_units weight_data = chunk[self.weight_field].in_units(units) else: weight_data = np.ones(filter.shape, dtype="float64") weight_data = weight_data[filter] # So that we can pass these into return arr, weight_data, bin_fields def __getitem__(self, field): fname = self.field_map.get(field, None) if fname is None: if isinstance(field, tuple): fname = self.field_map.get(field[1], None) elif isinstance(field, DerivedField): fname = self.field_map.get(field.name[1], None) if fname is None: raise KeyError(field) else: if getattr(self, 'fractional', False): return self.field_data[fname] else: return self.field_data[fname].in_units(self.field_units[fname]) def items(self): return [(k, self[k]) for k in self.field_data.keys()] def keys(self): return self.field_data.keys() def __iter__(self): return sorted(self.items()) def _get_bins(self, mi, ma, n, take_log): if take_log: ret = np.logspace(np.log10(mi), np.log10(ma), n + 1) # at this point ret[0] and ret[-1] are not exactly equal to # mi and ma due to round-off error. Let's force them to be # mi and ma exactly to avoid incorrectly discarding cells near # the edges. See Issue #1300. ret[0], ret[-1] = mi, ma return ret else: return np.linspace(mi, ma, n + 1) def save_as_dataset(self, filename=None): r"""Export a profile to a reloadable yt dataset. This function will take a profile and output a dataset containing all relevant fields. The resulting dataset can be reloaded as a yt dataset. Parameters ---------- filename : str, optional The name of the file to be written. If None, the name will be a combination of the original dataset plus the type of object, e.g., Profile1D. Returns ------- filename : str The name of the file that has been created. Examples -------- >>> import yt >>> ds = yt.load("enzo_tiny_cosmology/DD0046/DD0046") >>> ad = ds.all_data() >>> profile = yt.create_profile(ad, ["density", "temperature"], ... "cell_mass", weight_field=None, ... n_bins=(128, 128)) >>> fn = profile.save_as_dataset() >>> prof_ds = yt.load(fn) >>> print (prof_ds.data["cell_mass"]) (128, 128) >>> print (prof_ds.data["x"].shape) # x bins as 1D array (128,) >>> print (prof_ds.data["density"]) # x bins as 2D array (128, 128) >>> p = yt.PhasePlot(prof_ds.data, "density", "temperature", ... "cell_mass", weight_field=None) >>> p.save() """ keyword = "%s_%s" % (str(self.ds), self.__class__.__name__) filename = get_output_filename(filename, keyword, ".h5") args = ("field", "log") extra_attrs = { "data_type": "yt_profile", "profile_dimensions": self.size, "weight_field": self.weight_field, "fractional": self.fractional, "accumulation": self.accumulation } data = {} data.update(self.field_data) data["weight"] = self.weight data["used"] = self.used.astype("float64") dimensionality = 0 bin_data = [] for ax in "xyz": if hasattr(self, ax): dimensionality += 1 data[ax] = getattr(self, ax) bin_data.append(data[ax]) bin_field_name = "%s_bins" % ax data[bin_field_name] = getattr(self, bin_field_name) extra_attrs["%s_range" % ax] = self.ds.arr( [data[bin_field_name][0], data[bin_field_name][-1]]) for arg in args: key = "%s_%s" % (ax, arg) extra_attrs[key] = getattr(self, key) bin_fields = np.meshgrid(*bin_data) for i, ax in enumerate("xyz"[:dimensionality]): data[getattr(self, "%s_field" % ax)] = bin_fields[i] extra_attrs["dimensionality"] = dimensionality ftypes = dict([(field, "data") for field in data]) save_as_dataset(self.ds, filename, data, field_types=ftypes, extra_attrs=extra_attrs) return filename