Exemplo n.º 1
0
    def __init__(self, *datasets, **kwargs):
        """Create a MultipleEofSolver object.

        The EOF solution is computed at initialization time. Method
        calls are used to retrieve computed quantities.
        
        **Arguments:**

        *\*datasets*
            One or more :py:class:`numpy.ndarray`s or
            :py:class:`numpy.ma.core.MasekdArray`s with two or more
            dimensions containing the data to be analysed. The first
            dimension of each array is assumed to represent time.
            Missing values are permitted, either in the form of masked
            arrays, or the value :py:attr:`numpy.nan`. Missing values
            must be constant with time (e.g., values of an oceanographic
            field over land).

        **Optional arguments:**

        *weights*
            A sequence of arrays of weights whose shapes are compatible
            with those of the input data sets. The weights can have the
            same shape as the input data set or a shape compatible with
            a an array broadcast operation (ie. the shape of the weights
            can match the rightmost parts of the shape of the input data
            set). If none of the input data sets require weighting then
            the single value *None* may be used. Defaults to *None* (no
            weighting for any data set).

        *center*
            If *True*, the mean along the first axis of the input data
            set (the time-mean) will be removed prior to analysis. If
            *False*, the mean along the first axis will not be removed.
            Defaults to *True* (mean is removed). Generally this option
            should be set to *True* as the covariance interpretation
            relies on input data being anomalies with a time-mean of 0.
            A valid reson for turning this off would be if you have
            already generated an anomaly data set. Setting to *True* has
            the useful side-effect of propagating missing values along
            the time-dimension, ensuring the solver will work even if
            missing values occur at different locations at different
            times.

        *ddof*
            'Delta degrees of freedom'. The divisor used to normalize
            the covariance matrix is *N - ddof* where *N* is the
            number of samples. Defaults to *1*.

        """

        # Define valid keyword arguments and their default values. This method
        # is required since Python 2.7 cannot accept a variable argument list
        # followed by a set of keyword arguments. For some reason both must be
        # variable.
        keywords = {"weights": None, "center": True, "ddof": 1}
        for kwarg in kwargs.keys():
            if kwarg not in keywords.keys():
                raise EofError("invalid argument: %s" % kwarg)
        weights = kwargs.get("weights", keywords["weights"])
        center = kwargs.get("center", keywords["center"])
        ddof = kwargs.get("ddof", keywords["ddof"])
        # Record the number of datasets provided.
        self._ndatasets = len(datasets)
        # Initialise instance variables dealing with dataset shapes.
        self._multirecords = list()
        self._multishapes = list()
        self._multislicers = list()
        self._multichannels = list()
        self._multidtypes = list()
        slicebegin = 0
        for dataset in datasets:
            records = dataset.shape[0]
            shape = dataset.shape[1:]
            channels = np.product(shape)
            slicer = slice(slicebegin, slicebegin + channels)
            slicebegin += channels
            self._multirecords.append(records)
            self._multishapes.append(shape)
            self._multislicers.append(slicer)
            self._multichannels.append(channels)
            self._multidtypes.append(dataset.dtype)
        # Check that all fields have the same time dimension.
        if not (np.array(self._multirecords) == self._multirecords[0]).all():
            raise EofError("all datasets must have the same first dimension")
        # Get the dtype that will be used for the data and weights. This will
        # be the 'highest' dtype of those passed.
        dtype = sorted(self._multidtypes, reverse=True)[0]
        # Form a full array to pass to the EOF solver consisting of all the
        # flat inputs.
        nt = self._multirecords[0]
        ns = self._multichannels.sum()
        dataset = ma.empty([nt, ns], dtype=dtype)
        for iset in xrange(self._ndatasets):
            slicer = self._multislicers[iset]
            channels = self._multichannels[iset]
            dataset[:, slicer] = datasets[iset].reshape([nt, channels])
        # Construct an array of weights the same shape as the data array.
        if weights is not None:
            if len(weights) != self._ndatasets:
                raise EofError("number of weights and datasets differs")
            if not filter(lambda i: False if i is None else True, weights):
                # If every entry in the weights list is None then just pass
                # None to the EofSolver __init__ method.
                warr = None
            else:
                # Construct a spatial weights array.
                warr = np.empty([1, ns], dtype=dtype)
                for iset in xrange(self._ndatasets):
                    slicer = self._multislicers[iset]
                    if weights[iset] is None:
                        # If this dataset has no weights use 1 for the weight
                        # of all elements.
                        warr[:, slicer] = 1.
                    else:
                        # Otherwise use the weights. These need to be
                        # conformed to the correct dimensions.
                        channels = self._multichannels[iset]
                        try:
                            warr[:, slicer] = np.broadcast_arrays(
                                datasets[iset][0],
                                weights[iset])[1].reshape([channels])
                        except ValueError:
                            raise EofError("weights are invalid")
        else:
            # Just pass None if none of the input datasets have associated
            # weights.
            warr = None
        # Create an EofSolver object to handle the computations.
        self._solver = EofSolver(dataset, weights=warr, center=center, ddof=1)
Exemplo n.º 2
0
class Eof(object):
    """EOF analysis (meta-data enabled :py:mod:`cdms2` interface)."""
    def __init__(self, dataset, weights="none", center=True, ddof=1):
        """Create an Eof object.
        
        **Argument:**

        *dataset*
            A :py:mod:`cdms2` variable containing the data to be
            analyzed. Time must be the first dimension. Missing values
            are allowed provided that they are constant with time (e.g.,
            values of an oceanographic field over land).
        
        **Optional arguments:**

        *weights*
            Sets the weighting method. The following values are
            accepted:

            * *"area"* : Square-root of grid cell area normalized by
              total area. Requires a latitude-longitude grid to be
              present in the input :py:mod:`cdms2` variable *dataset*.
              This is a fairly standard weighting strategy. If you are
              unsure which method to use and you have gridded data then
              this should be your first choice.

            * *"coslat"* : Square-root of cosine of latitude
              (*"cos_lat"* is also accepted). Requires a latitude
              dimension to be present in the input :py:mod:`cdms2`
              variable *dataset*.

            * *"none"* : Equal weights for all grid points (default).

            * *None* : Same as *"none"*.

             An array of weights may also be supplied instead of
             specifying a weighting method.

        *center*
            If *True*, the mean along the first axis of the input data
            set (the time-mean) will be removed prior to analysis. If
            *False*, the mean along the first axis will not be removed.
            Defaults to *True* (mean is removed). Generally this option
            should be set to *True* as the covariance interpretation
            relies on input data being anomalies with a time-mean of 0.
            A valid reson for turning this off would be if you have
            already generated an anomaly data set. Setting to *True* has
            the useful side-effect of propagating missing values along
            the time-dimension, ensuring the solver will work even if
            missing values occur at different locations at different
            times.

        *ddof*
            'Delta degrees of freedom'. The divisor used to normalize
            the covariance matrix is *N - ddof* where *N* is the
            number of samples. Defaults to *1*.

        **Examples:**

        EOF analysis with area-weighting for the input field:

        >>> from eof2 import Eof
        >>> eofobj = Eof(field, weights="area")

        """
        # Check that dataset is recognised by cdms2 as a variable.
        if not cdms2.isVariable(dataset):
            raise EofError("the input data must be a cdms2 variable")
        # Store the time axis as an instance variable.
        self._timeax = dataset.getTime()
        # Verify that a time axis was found, getTime returns None when a
        # time axis is not found.
        if self._timeax is None:
            raise EofError("time axis not found")
        # Check the dimension order of the input, time must be the first
        # dimension.
        order = dataset.getOrder()
        if order[0] != "t":
            raise EofError("time must be the first dimension")
        # Verify the presence of at least one spatial dimension. The
        # instance variable channels will also be used as a partial axis
        # list when constructing meta-data. It contains the spatial
        # dimensions.
        self._channels = dataset.getAxisList()
        self._channels.remove(self._timeax)
        if len(self._channels) < 1:
            raise EofError("one or more spatial dimensions are required")
        # Store the missing value attribute of the data set in an
        # instance variable so that it is recoverable later.
        self._missing_value = dataset.getMissing()
        # Generate an appropriate set of weights for the input dataset. There
        # are several weighting schemes. The "area" weighting scheme requires
        # a latitude-longitude grid to be present, the "cos_lat" scheme only
        # requires a latitude dimension.
        if weights in ("none", None):
            # No weights requested, set the weight array to None.
            wtarray = None
        else:
            try:
                # Generate a weights array of the appropriate kind, with a
                # shape compatible with the data set.
                scheme = weights.lower()
                wtarray = weights_array(dataset, scheme=scheme)
            except AttributeError:
                # Weights is not a string, assume it is an array.
                wtarray = weights
            except EofToolError, err:
                # Weights is not recognized, raise an error.
                raise EofError(err)
        # Cast the wtarray to the same type as the dataset. This prevents the
        # promotion of 32-bit input to 64-bit on multiplication with the
        # weight array when not required. This will fail with a AttributeError
        # exception if the weights array is None, which it may be if no
        # weighting was requested.
        try:
            wtarray = wtarray.astype(dataset.dtype)
        except AttributeError:
            pass
        # Create an EofSolver object using appropriate arguments for this
        # data set. The object will be used for the decomposition and
        # for returning the results.
        self._solver = EofSolver(dataset.asma(),
                                 weights=wtarray,
                                 center=center,
                                 ddof=ddof)
        self.neofs = self._solver.neofs