예제 #1
0
class ParticleTrajectories:
    r"""A collection of particle trajectories in time over a series of
    datasets.

    Parameters
    ----------
    outputs : ~yt.data_objects.time_series.DatasetSeries
        DatasetSeries object from which to draw the particles.
    indices : array_like
        An integer array of particle indices whose trajectories we
        want to track. If they are not sorted they will be sorted.
    fields : list of strings, optional
        A set of fields that is retrieved when the trajectory
        collection is instantiated. Default: None (will default
        to the fields 'particle_position_x', 'particle_position_y',
        'particle_position_z')
    suppress_logging : boolean
        Suppress yt's logging when iterating over the simulation time
        series. Default: False
    ptype : str, optional
        Only use this particle type. Default: None, which uses all particle type.

    Examples
    --------
    >>> my_fns = glob.glob("orbit_hdf5_chk_00[0-9][0-9]")
    >>> my_fns.sort()
    >>> fields = ["particle_position_x", "particle_position_y",
    >>>           "particle_position_z", "particle_velocity_x",
    >>>           "particle_velocity_y", "particle_velocity_z"]
    >>> ds = load(my_fns[0])
    >>> init_sphere = ds.sphere(ds.domain_center, (.5, "unitary"))
    >>> indices = init_sphere["particle_index"].astype("int")
    >>> ts = DatasetSeries(my_fns)
    >>> trajs = ts.particle_trajectories(indices, fields=fields)
    >>> for t in trajs :
    >>>     print(t["particle_velocity_x"].max(), t["particle_velocity_x"].min())
    """
    def __init__(self,
                 outputs,
                 indices,
                 fields=None,
                 suppress_logging=False,
                 ptype=None):

        indices.sort()  # Just in case the caller wasn't careful
        self.field_data = YTFieldData()
        self.data_series = outputs
        self.masks = []
        self.sorts = []
        self.array_indices = []
        self.indices = indices
        self.num_indices = len(indices)
        self.num_steps = len(outputs)
        self.times = []
        self.suppress_logging = suppress_logging
        self.ptype = ptype

        if fields is None:
            fields = []
        fields = list(OrderedDict.fromkeys(fields))

        if self.suppress_logging:
            old_level = int(ytcfg.get("yt", "loglevel"))
            mylog.setLevel(40)
        ds_first = self.data_series[0]
        dd_first = ds_first.all_data()

        fds = {}
        for field in (
                "particle_index",
                "particle_position_x",
                "particle_position_y",
                "particle_position_z",
        ):
            fds[field] = self._get_full_field_name(field)[0]

        my_storage = {}
        pbar = get_pbar("Constructing trajectory information",
                        len(self.data_series))
        for i, (sto,
                ds) in enumerate(self.data_series.piter(storage=my_storage)):
            dd = ds.all_data()
            newtags = dd[fds["particle_index"]].d.astype("int64")
            mask = np.in1d(newtags, indices, assume_unique=True)
            sort = np.argsort(newtags[mask])
            array_indices = np.where(
                np.in1d(indices, newtags, assume_unique=True))[0]
            self.array_indices.append(array_indices)
            self.masks.append(mask)
            self.sorts.append(sort)

            pfields = {}
            for field in (f"particle_position_{ax}" for ax in "xyz"):
                pfields[field] = dd[fds[field]].ndarray_view()[mask][sort]

            sto.result_id = ds.parameter_filename
            sto.result = (ds.current_time, array_indices, pfields)
            pbar.update(i)
        pbar.finish()

        if self.suppress_logging:
            mylog.setLevel(old_level)

        sorted_storage = sorted(my_storage.items())
        times = [time for _fn, (time, *_) in sorted_storage]
        self.times = self.data_series[0].arr(times, times[0].units)

        self.particle_fields = []
        output_field = np.empty((self.num_indices, self.num_steps))
        output_field.fill(np.nan)
        for field in (f"particle_position_{ax}" for ax in "xyz"):
            for i, (_fn, (_time, indices,
                          pfields)) in enumerate(sorted_storage):
                try:
                    # This will fail if particles ids are
                    # duplicate. This is due to the fact that the rhs
                    # would then have a different shape as the lhs
                    output_field[indices, i] = pfields[field]
                except ValueError as e:
                    raise YTIllDefinedParticleData(
                        "This dataset contains duplicate particle indices!"
                    ) from e
            self.field_data[field] = array_like_field(dd_first,
                                                      output_field.copy(),
                                                      fds[field])
            self.particle_fields.append(field)

        # Instantiate fields the caller requested
        self._get_data(fields)

    def has_key(self, key):
        return key in self.field_data

    def keys(self):
        return self.field_data.keys()

    def _get_full_field_name(self, field):
        ds_first = self.data_series[0]
        dd_first = ds_first.all_data()
        ptype = self.ptype if self.ptype else "all"
        return dd_first._determine_fields((ptype, field))

    def __getitem__(self, key):
        """
        Get the field associated with key.
        """
        if key == "particle_time":
            return self.times
        if key not in self.field_data:
            self._get_data([key])
        return self.field_data[key]

    def __setitem__(self, key, val):
        """
        Sets a field to be some other value.
        """
        self.field_data[key] = val

    def __delitem__(self, key):
        """
        Delete the field from the trajectory
        """
        del self.field_data[key]

    def __iter__(self):
        """
        This iterates over the trajectories for
        the different particles, returning dicts
        of fields for each trajectory
        """
        for idx in range(self.num_indices):
            traj = {}
            traj["particle_index"] = self.indices[idx]
            traj["particle_time"] = self.times
            for field in self.field_data.keys():
                traj[field] = self[field][idx, :]
            yield traj

    def __len__(self):
        """
        The number of individual trajectories
        """
        return self.num_indices

    def add_fields(self, fields):
        """
        Add a list of fields to an existing trajectory

        Parameters
        ----------
        fields : list of strings
            A list of fields to be added to the current trajectory
            collection.

        Examples
        ________
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.add_fields(["particle_mass", "particle_gpot"])
        """
        self._get_data(fields)

    def _get_data(self, fields):
        """
        Get a list of fields to include in the trajectory collection.
        The trajectory collection itself is a dict of 2D numpy arrays,
        with shape (num_indices, num_steps)
        """

        missing_fields = [
            field for field in fields if field not in self.field_data
        ]
        if not missing_fields:
            return

        if self.suppress_logging:
            old_level = int(ytcfg.get("yt", "loglevel"))
            mylog.setLevel(40)
        ds_first = self.data_series[0]
        dd_first = ds_first.all_data()

        fds = {}
        new_particle_fields = []
        for field in missing_fields:
            fds[field] = dd_first._determine_fields(field)[0]
            if field not in self.particle_fields:
                if self.data_series[0]._get_field_info(
                        *fds[field]).particle_type:
                    self.particle_fields.append(field)
                    new_particle_fields.append(field)

        grid_fields = [
            field for field in missing_fields
            if field not in self.particle_fields
        ]
        step = int(0)
        pbar = get_pbar(
            f"Generating [{', '.join(missing_fields)}] fields in trajectories",
            self.num_steps,
        )
        my_storage = {}

        for i, (sto,
                ds) in enumerate(self.data_series.piter(storage=my_storage)):
            mask = self.masks[i]
            sort = self.sorts[i]
            pfield = {}

            if new_particle_fields:  # there's at least one particle field
                dd = ds.all_data()
                for field in new_particle_fields:
                    # This is easy... just get the particle fields
                    pfield[field] = dd[fds[field]].d[mask][sort]

            if grid_fields:
                # This is hard... must loop over grids
                for field in grid_fields:
                    pfield[field] = np.zeros(self.num_indices)
                x = self["particle_position_x"][:, step].d
                y = self["particle_position_y"][:, step].d
                z = self["particle_position_z"][:, step].d
                particle_grids, particle_grid_inds = ds.index._find_points(
                    x, y, z)

                # This will fail for non-grid index objects
                for grid in particle_grids:
                    cube = grid.retrieve_ghost_zones(1, grid_fields)
                    for field in grid_fields:
                        CICSample_3(
                            x,
                            y,
                            z,
                            pfield[field],
                            self.num_indices,
                            cube[fds[field]],
                            np.array(grid.LeftEdge).astype(np.float64),
                            np.array(grid.ActiveDimensions).astype(np.int32),
                            grid.dds[0],
                        )
            sto.result_id = ds.parameter_filename
            sto.result = (self.array_indices[i], pfield)
            pbar.update(step)
            step += 1
        pbar.finish()

        output_field = np.empty((self.num_indices, self.num_steps))
        output_field.fill(np.nan)
        for field in missing_fields:
            fd = fds[field]
            for i, (_fn, (indices,
                          pfield)) in enumerate(sorted(my_storage.items())):
                output_field[indices, i] = pfield[field]
            self.field_data[field] = array_like_field(dd_first,
                                                      output_field.copy(), fd)

        if self.suppress_logging:
            mylog.setLevel(old_level)

    def trajectory_from_index(self, index):
        """
        Retrieve a single trajectory corresponding to a specific particle
        index

        Parameters
        ----------
        index : int
            This defines which particle trajectory from the
            ParticleTrajectories object will be returned.

        Returns
        -------
        A dictionary corresponding to the particle's trajectory and the
        fields along that trajectory

        Examples
        --------
        >>> from yt.mods import *
        >>> import matplotlib.pylab as pl
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> traj = trajs.trajectory_from_index(indices[0])
        >>> pl.plot(traj["particle_time"], traj["particle_position_x"], "-x")
        >>> pl.savefig("orbit")
        """
        mask = np.in1d(self.indices, (index, ), assume_unique=True)
        if not np.any(mask):
            print("The particle index %d is not in the list!" % (index))
            raise IndexError
        fields = [field for field in sorted(self.field_data.keys())]
        traj = {}
        traj["particle_time"] = self.times
        traj["particle_index"] = index
        for field in fields:
            traj[field] = self[field][mask, :][0]
        return traj

    @parallel_root_only
    def write_out(self, filename_base):
        """
        Write out particle trajectories to tab-separated ASCII files (one
        for each trajectory) with the field names in the file header. Each
        file is named with a basename and the index number.

        Parameters
        ----------
        filename_base : string
            The prefix for the outputted ASCII files.

        Examples
        --------
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.write_out("orbit_trajectory")
        """
        fields = [field for field in sorted(self.field_data.keys())]
        num_fields = len(fields)
        first_str = "# particle_time\t" + "\t".join(fields) + "\n"
        template_str = "%g\t" * num_fields + "%g\n"
        for ix in range(self.num_indices):
            outlines = [first_str]
            for it in range(self.num_steps):
                outlines.append(
                    template_str %
                    tuple([self.times[it]] +
                          [self[field][ix, it] for field in fields]))
            fid = open(filename_base + "_%d.dat" % self.indices[ix], "w")
            fid.writelines(outlines)
            fid.close()
            del fid

    @parallel_root_only
    def write_out_h5(self, filename):
        """
        Write out all the particle trajectories to a single HDF5 file
        that contains the indices, the times, and the 2D array for each
        field individually

        Parameters
        ----------

        filename : string
            The output filename for the HDF5 file

        Examples
        --------
        >>> trajs = ParticleTrajectories(my_fns, indices)
        >>> trajs.write_out_h5("orbit_trajectories")
        """
        fid = h5py.File(filename, mode="w")
        fid.create_dataset("particle_indices",
                           dtype=np.int64,
                           data=self.indices)
        fid.close()
        self.times.write_hdf5(filename, dataset_name="particle_times")
        fields = [field for field in sorted(self.field_data.keys())]
        for field in fields:
            self[field].write_hdf5(filename, dataset_name=f"{field}")
예제 #2
0
class ProfileND(ParallelAnalysisInterface):
    """The profile object class"""
    def __init__(self, data_source, weight_field=None):
        self.data_source = data_source
        self.ds = data_source.ds
        self.field_map = {}
        self.field_info = {}
        self.field_data = YTFieldData()
        if weight_field is not None:
            self.standard_deviation = YTFieldData()
            weight_field = self.data_source._determine_fields(weight_field)[0]
        else:
            self.standard_deviation = None
        self.weight_field = weight_field
        self.field_units = {}
        ParallelAnalysisInterface.__init__(self, comm=data_source.comm)

    @property
    def variance(self):
        issue_deprecation_warning("""
profile.variance incorrectly returns the profile standard deviation and has 
been deprecated, use profile.standard_deviation instead.""")
        return self.standard_deviation

    def add_fields(self, fields):
        """Add fields to profile

        Parameters
        ----------
        fields : list of field names
            A list of fields to create profile histograms for
        
        """
        fields = self.data_source._determine_fields(fields)
        for f in fields:
            self.field_info[f] = self.data_source.ds.field_info[f]
        temp_storage = ProfileFieldAccumulator(len(fields), self.size)
        citer = self.data_source.chunks([], "io")
        for chunk in parallel_objects(citer):
            self._bin_chunk(chunk, fields, temp_storage)
        self._finalize_storage(fields, temp_storage)

    def set_field_unit(self, field, new_unit):
        """Sets a new unit for the requested field

        Parameters
        ----------
        field : string or field tuple
           The name of the field that is to be changed.

        new_unit : string or Unit object
           The name of the new unit.
        """
        if field in self.field_units:
            self.field_units[field] = \
                Unit(new_unit, registry=self.ds.unit_registry)
        else:
            fd = self.field_map[field]
            if fd in self.field_units:
                self.field_units[fd] = \
                    Unit(new_unit, registry=self.ds.unit_registry)
            else:
                raise KeyError("%s not in profile!" % (field))

    def _finalize_storage(self, fields, temp_storage):
        # We use our main comm here
        # This also will fill _field_data

        for i, field in enumerate(fields):
            # q values are returned as q * weight but we want just q
            temp_storage.qvalues[..., i][temp_storage.used] /= \
              temp_storage.weight_values[temp_storage.used]

        # get the profile data from all procs
        all_store = {self.comm.rank: temp_storage}
        all_store = self.comm.par_combine_object(all_store,
                                                 "join",
                                                 datatype="dict")

        all_val = np.zeros_like(temp_storage.values)
        all_mean = np.zeros_like(temp_storage.mvalues)
        all_std = np.zeros_like(temp_storage.qvalues)
        all_weight = np.zeros_like(temp_storage.weight_values)
        all_used = np.zeros_like(temp_storage.used, dtype="bool")

        # Combine the weighted mean and standard deviation from each processor.
        # For two samples with total weight, mean, and standard deviation
        # given by w, m, and s, their combined mean and standard deviation are:
        # m12 = (m1 * w1 + m2 * w2) / (w1 + w2)
        # s12 = (m1 * (s1**2 + (m1 - m12)**2) +
        #        m2 * (s2**2 + (m2 - m12)**2)) / (w1 + w2)
        # Here, the mvalues are m and the qvalues are s**2.
        for p in sorted(all_store.keys()):
            all_used += all_store[p].used
            old_mean = all_mean.copy()
            old_weight = all_weight.copy()
            all_weight[all_store[p].used] += \
              all_store[p].weight_values[all_store[p].used]
            for i, field in enumerate(fields):
                all_val[..., i][all_store[p].used] += \
                  all_store[p].values[..., i][all_store[p].used]

                all_mean[..., i][all_store[p].used] = \
                  (all_mean[..., i] * old_weight +
                   all_store[p].mvalues[..., i] *
                   all_store[p].weight_values)[all_store[p].used] / \
                   all_weight[all_store[p].used]

                all_std[..., i][all_store[p].used] = \
                  (old_weight * (all_std[..., i] +
                                 (old_mean[..., i] - all_mean[..., i])**2) +
                   all_store[p].weight_values *
                   (all_store[p].qvalues[..., i] +
                    (all_store[p].mvalues[..., i] -
                     all_mean[..., i])**2))[all_store[p].used] / \
                    all_weight[all_store[p].used]

        all_std = np.sqrt(all_std)
        del all_store
        self.used = all_used
        blank = ~all_used

        self.weight = all_weight
        self.weight[blank] = 0.0

        for i, field in enumerate(fields):
            if self.weight_field is None:
                self.field_data[field] = \
                  array_like_field(self.data_source,
                                   all_val[...,i], field)
            else:
                self.field_data[field] = \
                  array_like_field(self.data_source,
                                   all_mean[...,i], field)
                self.standard_deviation[field] = \
                  array_like_field(self.data_source,
                                   all_std[...,i], field)
                self.standard_deviation[field][blank] = 0.0
            self.field_data[field][blank] = 0.0
            self.field_units[field] = self.field_data[field].units
            if isinstance(field, tuple):
                self.field_map[field[1]] = field
            else:
                self.field_map[field] = field

    def _bin_chunk(self, chunk, fields, storage):
        raise NotImplementedError

    def _filter(self, bin_fields):
        # cut_points is set to be everything initially, but
        # we also want to apply a filtering based on min/max
        filter = np.ones(bin_fields[0].shape, dtype='bool')
        for (mi, ma), data in zip(self.bounds, bin_fields):
            filter &= (data > mi)
            filter &= (data < ma)
        return filter, [data[filter] for data in bin_fields]

    def _get_data(self, chunk, fields):
        # We are using chunks now, which will manage the field parameters and
        # the like.
        bin_fields = [chunk[bf] for bf in self.bin_fields]
        # We want to make sure that our fields are within the bounds of the
        # binning
        filter, bin_fields = self._filter(bin_fields)
        if not np.any(filter): return None
        arr = np.zeros((bin_fields[0].size, len(fields)), dtype="float64")
        for i, field in enumerate(fields):
            units = chunk.ds.field_info[field].output_units
            arr[:, i] = chunk[field][filter].in_units(units)
        if self.weight_field is not None:
            units = chunk.ds.field_info[self.weight_field].output_units
            weight_data = chunk[self.weight_field].in_units(units)
        else:
            weight_data = np.ones(filter.shape, dtype="float64")
        weight_data = weight_data[filter]
        # So that we can pass these into
        return arr, weight_data, bin_fields

    def __getitem__(self, field):
        fname = self.field_map.get(field, None)
        if fname is None:
            if isinstance(field, tuple):
                fname = self.field_map.get(field[1], None)
            elif isinstance(field, DerivedField):
                fname = self.field_map.get(field.name[1], None)
        if fname is None:
            raise KeyError(field)
        else:
            if getattr(self, 'fractional', False):
                return self.field_data[fname]
            else:
                return self.field_data[fname].in_units(self.field_units[fname])

    def items(self):
        return [(k, self[k]) for k in self.field_data.keys()]

    def keys(self):
        return self.field_data.keys()

    def __iter__(self):
        return sorted(self.items())

    def _get_bins(self, mi, ma, n, take_log):
        if take_log:
            ret = np.logspace(np.log10(mi), np.log10(ma), n + 1)
            # at this point ret[0] and ret[-1] are not exactly equal to
            # mi and ma due to round-off error. Let's force them to be
            # mi and ma exactly to avoid incorrectly discarding cells near
            # the edges. See Issue #1300.
            ret[0], ret[-1] = mi, ma
            return ret
        else:
            return np.linspace(mi, ma, n + 1)

    def save_as_dataset(self, filename=None):
        r"""Export a profile to a reloadable yt dataset.

        This function will take a profile and output a dataset
        containing all relevant fields.  The resulting dataset
        can be reloaded as a yt dataset.

        Parameters
        ----------
        filename : str, optional
            The name of the file to be written.  If None, the name
            will be a combination of the original dataset plus
            the type of object, e.g., Profile1D.

        Returns
        -------
        filename : str
            The name of the file that has been created.

        Examples
        --------

        >>> import yt
        >>> ds = yt.load("enzo_tiny_cosmology/DD0046/DD0046")
        >>> ad = ds.all_data()
        >>> profile = yt.create_profile(ad, ["density", "temperature"],
        ...                            "cell_mass", weight_field=None,
        ...                             n_bins=(128, 128))
        >>> fn = profile.save_as_dataset()
        >>> prof_ds = yt.load(fn)
        >>> print (prof_ds.data["cell_mass"])
        (128, 128)
        >>> print (prof_ds.data["x"].shape) # x bins as 1D array
        (128,)
        >>> print (prof_ds.data["density"]) # x bins as 2D array
        (128, 128)
        >>> p = yt.PhasePlot(prof_ds.data, "density", "temperature",
        ...                  "cell_mass", weight_field=None)
        >>> p.save()

        """

        keyword = "%s_%s" % (str(self.ds), self.__class__.__name__)
        filename = get_output_filename(filename, keyword, ".h5")

        args = ("field", "log")
        extra_attrs = {
            "data_type": "yt_profile",
            "profile_dimensions": self.size,
            "weight_field": self.weight_field,
            "fractional": self.fractional,
            "accumulation": self.accumulation
        }
        data = {}
        data.update(self.field_data)
        data["weight"] = self.weight
        data["used"] = self.used.astype("float64")

        dimensionality = 0
        bin_data = []
        for ax in "xyz":
            if hasattr(self, ax):
                dimensionality += 1
                data[ax] = getattr(self, ax)
                bin_data.append(data[ax])
                bin_field_name = "%s_bins" % ax
                data[bin_field_name] = getattr(self, bin_field_name)
                extra_attrs["%s_range" % ax] = self.ds.arr(
                    [data[bin_field_name][0], data[bin_field_name][-1]])
                for arg in args:
                    key = "%s_%s" % (ax, arg)
                    extra_attrs[key] = getattr(self, key)

        bin_fields = np.meshgrid(*bin_data)
        for i, ax in enumerate("xyz"[:dimensionality]):
            data[getattr(self, "%s_field" % ax)] = bin_fields[i]

        extra_attrs["dimensionality"] = dimensionality
        ftypes = dict([(field, "data") for field in data])
        save_as_dataset(self.ds,
                        filename,
                        data,
                        field_types=ftypes,
                        extra_attrs=extra_attrs)

        return filename