Пример #1
def test_nddataset_binary_operation_with_other_1D():
    coord1 = Coord(np.linspace(0.0, 10.0, 10))
    coord2 = Coord(np.linspace(1.0, 5.5, 5))
    d1 = NDDataset(np.random.random((10, 5)), coordset=[coord1, coord2])
    d2 = d1[0]
    # this should work independently of the value of the coordinates on dimension y
    d3 = d1 - d2
    assert_array_equal(d3.data, d1.data - d2.data)
Пример #2
    def __init__(self,

        super().__init__(data, **kwargs)

        self._parent = None

        # eventually set the coordinates with optional units and title

        if isinstance(coordset, CoordSet):

            if coordset is None:
                coordset = [None] * self.ndim

            if coordunits is None:
                coordunits = [None] * self.ndim

            if coordtitles is None:
                coordtitles = [None] * self.ndim

            _coordset = []
            for c, u, t in zip(coordset, coordunits, coordtitles):
                if not isinstance(c, CoordSet):
                    if isinstance(c, LinearCoord):
                        coord = LinearCoord(c)
                        coord = Coord(c)
                    if u is not None:
                        coord.units = u
                    if t is not None:
                        coord.title = t
                    if u:  # pragma: no cover
                            "units have been set for a CoordSet, but this will be ignored "
                            "(units are only defined at the coordinate level")
                    if t:  # pragma: no cover
                            "title will be ignored as they are only defined at the coordinates level"
                    coord = c


            if _coordset and set(_coordset) != {
            }:  # if they are no coordinates do nothing
Пример #3
def stack(*datasets):
    Stack of |NDDataset| objects along a new dimension.

    Any number of |NDDataset| objects can be stacked. For this operation
    to be defined the following must be true :

    #. all inputs must be valid dataset objects,
    #. units of data and axis must be compatible (rescaling is applied
       automatically if necessary).

    *datasets : a series of |NDDataset|
        The dataset to be stacked to the current dataset.

        A |NDDataset| created from the stack of the `datasets` datasets.

    See Also
    concatenate : Concatenate |NDDataset| objects along a given dimension.


    >>> A = scp.read('irdata/nh4y-activation.spg', protocol='omnic')
    >>> B = scp.read('irdata/nh4y-activation.scp')
    >>> C = scp.stack(A, B)
    >>> print(C)
    NDDataset: [float64] a.u. (shape: (z:2, y:55, x:5549))

    datasets = _get_copy(datasets)

    shapes = {ds.shape for ds in datasets}
    if len(shapes) != 1:
        raise DimensionsCompatibilityError(
            "all input arrays must have the same shape")

    # prepend a new dimension
    for i, dataset in enumerate(datasets):
        dataset._data = dataset.data[np.newaxis]
        dataset._mask = dataset.mask[np.newaxis]
        newcoord = Coord([i], labels=[dataset.name])
        newcoord.name = (OrderedSet(DEFAULT_DIM_NAME) - dataset._dims).pop()
        dataset.dims = [newcoord.name] + dataset.dims

    return concatenate(*datasets, dims=0)
Пример #4
def test_coord_unit_conversion_operators(operation, result_units):
    in_km = Coord(data=np.linspace(4000, 1000, 10),

    scalar = 2.

    operator_km = in_km.__getattribute__(operation)

    combined = operator_km(scalar)
    debug_(f'{operation}, {combined}')
    assert_equal_units(combined.units, result_units)
Пример #5
    def __getitem__(self, items, **kwargs):

        saveditems = items

        # coordinate selection to test first
        if isinstance(items, str):
                return self._coordset[items]
            except Exception:

        # slicing
        new, items = super().__getitem__(items, return_index=True)

        if new is None:
            return None

        if self._coordset is not None:
            names = self._coordset.names  # all names of the current coordinates
            new_coords = [None] * len(names)
            for i, item in enumerate(items):
                # get the corresponding dimension name in the dims list
                name = self.dims[i]
                # get the corresponding index in the coordinate's names list
                idx = names.index(name)
                if self._coordset[idx].is_empty:
                    new_coords[idx] = Coord(None, name=name)
                elif isinstance(item, slice):
                    # add the slice on the corresponding coordinates on the dim to the new list of coordinates
                    if not isinstance(self._coordset[idx], CoordSet):
                        new_coords[idx] = self._coordset[idx][item]
                        # we must slice all internal coordinates
                        newc = []
                        for c in self._coordset[idx]:
                        new_coords[idx] = CoordSet(*newc[::-1], name=name)
                        # we reverse to be sure
                        # the order will be  kept for internal coordinates
                        new_coords[idx]._default = self._coordset[
                            idx]._default  # set the same default coord
                        new_coords[idx]._is_same_dim = self._coordset[

                elif isinstance(item, (np.ndarray, list)):
                    new_coords[idx] = self._coordset[idx][item]

            new.set_coordset(*new_coords, keepnames=True)

        new.history = f"Slice extracted: ({saveditems})"
        return new
Пример #6
def test_coord_unit_conversion_operators_a(operation, result_units):
    print(operation, result_units)
    in_km = Coord(data=np.linspace(4000, 1000, 10),

    scalar_in_m = 2. * ur.m

    operator_km = in_km.__getattribute__(operation)

    combined = operator_km(scalar_in_m)

    assert_equal_units(combined.units, result_units)
Пример #7
def test_nddataset_add_mismatch_coords():
    coord1 = Coord(np.arange(5.0))
    coord2 = Coord(np.arange(1.0, 5.5, 1.0))
    d1 = NDDataset(np.ones((5, 5)), coordset=[coord1, coord2])
    d2 = NDDataset(np.ones((5, 5)), coordset=[coord2, coord1])
    with pytest.raises(CoordinateMismatchError) as exc:
        d1 -= d2
    assert str(
        exc.value).startswith("\nCoord.data attributes are not almost equal")
    with pytest.raises(CoordinateMismatchError) as exc:
        d1 += d2
    assert str(exc.value).startswith(
        "\nCoord.data attributes are not almost equal"
    )  # TODO= make more tests like this for various functions
Пример #8
def test_coord_add_units_with_different_scale():
    d1 = Coord.arange(3.0, units="m")
    d2 = Coord.arange(3.0, units="cm")

    x = d1 + 1.0 * ur.cm
    assert x.data[1] == 1.01

    x = d1 + d2
    assert x.data[1] == 1.01
    x = d2 + d1
    assert x.data[1] == 101.0
    d1 += d2
    assert d1.data[1] == 1.01
    d2 += d1
    assert d2.data[1] == 102.0
Пример #9
def test_coord_not_implemented(name):
    coord0 = Coord(data=np.linspace(4000, 1000, 10),
    with pytest.raises(NotImplementedError):
        getattr(coord0, name)()
Пример #10
def test_IRIS():
    X = NDDataset.read_omnic(os.path.join('irdata', 'CO@Mo_Al2O3.SPG'))

    p = [
        0.00300, 0.00400, 0.00900, 0.01400, 0.02100, 0.02600, 0.03600, 0.05100,
        0.09300, 0.15000, 0.20300, 0.30000, 0.40400, 0.50300, 0.60200, 0.70200,
        0.80100, 0.90500, 1.00400

    X.coordset.update(y=Coord(p, title='pressure', units='torr'))
    # Using the `update` method is mandatory because it will preserve the name.
    # Indeed, setting using X.coordset[0] = Coord(...) fails unless name is specified: Coord(..., name='y')

    # set the optimization parameters, perform the analysis
    # and plot the results

    param = {
        'epsRange': [-8, -1, 20],
        'lambdaRange': [-7, -5, 3],
        'kernel': 'langmuir'

    X_ = X[:, 2250.:1950.]

    iris = IRIS(X_, param, verbose=True)

    f = iris.f
    X_hat = iris.reconstruct()

    f[0].plot(method='map', plottitle=True)

Пример #11
def test_linearcoord():
    coord1 = Coord([1, 2.5, 4, 5])

    coord2 = Coord(np.array([1, 2.5, 4, 5]))
    assert coord2 == coord1

    coord3 = Coord(range(10))

    coord4 = Coord(np.arange(10))
    assert coord4 == coord3

    coord5 = coord4.copy()
    coord5 += 1
    assert np.all(coord5.data == coord4.data + 1)

    assert coord5 is not None
    coord5.linear = True

    coord6 = Coord(linear=True, offset=2.0, increment=2.0, size=10)
    assert np.all(coord6.data == (coord4.data + 1.0) * 2.)

    LinearCoord(offset=2.0, increment=2.0, size=10)

    coord0 = LinearCoord.linspace(200.,
                                  labels=['cold', 'normal', 'hot'],
    coord1 = LinearCoord.linspace(0.,
    coord2 = LinearCoord.linspace(4000.,

    assert coord0.size == 3
    assert coord1.size == 100
    assert coord2.size == 100

    coordc = coord0.copy()
    assert coord0 == coordc

    coordc = coord1.copy()
    assert coord1 == coordc
Пример #12
def test_coord_unary_ufuncs_simple_data(name):
    coord0 = Coord(data=np.linspace(4000, 1000, 10),

    f = getattr(np, name)
    r = f(coord0)
    assert isinstance(r, Coord)
Пример #13
def test_coord_slicing():
    # slicing by index

    coord0 = Coord(data=np.linspace(4000, 1000, 10),

    assert coord0[0] == 4000.0

    coord1 = Coord(data=np.linspace(4000, 1000, 10),
    c1 = coord1[0]
    assert isinstance(c1.values, Quantity)
    assert coord1[0].values == 4000.0 * (1.0 / ur.cm)

    # slicing with labels

    labs = list("abcdefghij")

    coord0 = Coord(
        data=np.linspace(4000, 1000, 10),

    assert coord0[0].values == 4000.0 * (1.0 / ur.cm)
    assert isinstance(coord0[0].values, Quantity)

    assert coord0[2] == coord0["c"]
    assert coord0["c":"d"] == coord0[2:4]  # label included

    # slicing only-labels coordinates

    y = list("abcdefghij")
    a = Coord(labels=y, name="x")
    assert a.name == "x"
    assert isinstance(a.labels, np.ndarray)
    assert_array_equal(a.values, a.labels)
Пример #14
 def sv(self):
     """|NDDataset|, Singular values"""
     size = self.s.size
     sv = self.s.copy()
     sv.name = 'sv'
     sv.title = 'singular values'
               labels=['#%d' % (i + 1) for i in range(size)],
     return sv
Пример #15
 def ev(self):
     """|NDDataset|, Explained variance"""
     size = self.s.size
     ev = self.s**2 / (size - 1)
     ev.name = 'ev'
     ev.title = 'explained variance'
               labels=['#%d' % (i + 1) for i in range(size)],
     return ev
Пример #16
def test_coord_slicing():
    # slicing by index

    coord0 = Coord(data=np.linspace(4000, 1000, 10),

    assert coord0[0] == 4000.0

    coord1 = Coord(data=np.linspace(4000, 1000, 10),
    c1 = coord1[0]
    assert isinstance(c1.values, Quantity)
    assert coord1[0].values == 4000.0 * (1. / ur.cm)

    # slicing with labels

    labs = list('abcdefghij')

    coord0 = Coord(data=np.linspace(4000, 1000, 10),

    assert coord0[0].values == 4000.0 * (1. / ur.cm)
    assert isinstance(coord0[0].values, Quantity)

    assert coord0[2] == coord0['c']
    assert coord0['c':'d'] == coord0[2:4]  # label included

    # slicing only-labels coordinates

    y = list('abcdefghij')
    a = Coord(labels=y, name='x')
    assert a.name == 'x'
    assert isinstance(a.labels, np.ndarray)
    assert_array_equal(a.values, a.labels)
Пример #17
 def ev(self):
     Explained variance (|NDDataset|).
     size = self.s.size
     ev = self.s**2 / (size - 1)
     ev.name = "ev"
     ev.title = "explained variance"
               labels=[f"#{(i + 1)}" for i in range(size)],
     return ev
Пример #18
 def sv(self):
     Singular values (|NDDataset|).
     size = self.s.size
     sv = self.s.copy()
     sv.name = "sv"
     sv.title = "singular values"
               labels=[f"#{(i + 1)}" for i in range(size)],
     return sv
Пример #19
    def update(self, **kwargs):
        Update a specific coordinates in the CoordSet.

        kwarg : Only keywords among the CoordSet.names are allowed - they denotes the name of a dimension.
        dims = kwargs.keys()
        for dim in list(dims)[:]:
            if dim in self.names:
                # we can replace the given coordinates
                idx = self.names.index(dim)
                self[idx] = Coord(kwargs.pop(dim), name=dim)
Пример #20
def _add_omnic_info(dataset, **kwargs):
    # get the time and name
    name = desc = dataset.name

    # modify the dataset metadata
    dataset.units = 'absorbance'
    dataset.title = 'absorbance'
    dataset.name = name
    dataset.description = ('Dataset from .csv file: {}\n'.format(desc))
    dataset.history = str(datetime.now(
        timezone.utc)) + ':read from omnic exported csv file \n'
    dataset.origin = 'omnic'

    # Set the NDDataset date
    dataset._date = datetime.now(timezone.utc)
    dataset._modified = dataset.date

    # x axis
    dataset.x.units = 'cm^-1'

    # y axis ?
    if '_' in name:
        name, dat = name.split('_')
        # if needed convert weekday name to English
        dat = dat.replace('Lun', 'Mon')
        dat = dat[:3].replace('Mar', 'Tue') + dat[3:]
        dat = dat.replace('Mer', 'Wed')
        dat = dat.replace('Jeu', 'Thu')
        dat = dat.replace('Ven', 'Fri')
        dat = dat.replace('Sam', 'Sat')
        dat = dat.replace('Dim', 'Sun')
        # convert month name to English
        dat = dat.replace('Aout', 'Aug')

        # get the dates
        acqdate = datetime.strptime(dat, "%a %b %d %H-%M-%S %Y")

        # Transform back to timestamp for storage in the Coord object
        # use datetime.fromtimestamp(d, timezone.utc))
        # to transform back to datetime obkct
        timestamp = acqdate.timestamp()

        dataset.y = Coord(np.array([timestamp]), name='y')
        dataset.set_coordtitles(y='acquisition timestamp (GMT)',
        dataset.y.labels = np.array([[acqdate], [name]])
        dataset.y.units = 's'

    return dataset
Пример #21
def _add_omnic_info(dataset, **kwargs):
    # get the time and name
    name = desc = dataset.name

    # modify the dataset metadata
    dataset.units = "absorbance"
    dataset.title = "absorbance"
    dataset.name = name
    dataset.description = "Dataset from .csv file: {}\n".format(desc)
    dataset.history = (str(datetime.now(timezone.utc)) +
                       ":read from omnic exported csv file \n")
    dataset.origin = "omnic"

    # Set the NDDataset date
    dataset._date = datetime.now(timezone.utc)
    dataset._modified = dataset.date

    # x axis
    dataset.x.units = "cm^-1"

    # y axis ?
    if "_" in name:
        name, dat = name.split("_")
        # if needed convert weekday name to English
        dat = dat.replace("Lun", "Mon")
        dat = dat[:3].replace("Mar", "Tue") + dat[3:]
        dat = dat.replace("Mer", "Wed")
        dat = dat.replace("Jeu", "Thu")
        dat = dat.replace("Ven", "Fri")
        dat = dat.replace("Sam", "Sat")
        dat = dat.replace("Dim", "Sun")
        # convert month name to English
        dat = dat.replace("Aout", "Aug")

        # get the dates
        acqdate = datetime.strptime(dat, "%a %b %d %H-%M-%S %Y")

        # Transform back to timestamp for storage in the Coord object
        # use datetime.fromtimestamp(d, timezone.utc))
        # to transform back to datetime obkct
        timestamp = acqdate.timestamp()

        dataset.y = Coord(np.array([timestamp]), name="y")
        dataset.set_coordtitles(y="acquisition timestamp (GMT)",
        dataset.y.labels = np.array([[acqdate], [name]])
        dataset.y.units = "s"

    return dataset
Пример #22
def _make_concentrations_matrix(*profiles):
    from spectrochempy.core.dataset.coord import Coord
    from spectrochempy.core.dataset.nddataset import NDDataset

    t = Coord(np.linspace(0, 10, 50), units='hour', title='time')
    c = []
    for p in profiles:
    ct = np.vstack(c)
    ct = ct - ct.min()
    ct = ct / np.sum(ct, axis=0)
    ct = NDDataset(data=ct,
                   coordset=[range(len(ct)), t])

    return ct
Пример #23
def _make_spectra_matrix(pos, width, ampl):
    from spectrochempy.core.dataset.coord import Coord
    from spectrochempy.core.dataset.nddataset import NDDataset
    from spectrochempy.core.fitting.models import gaussianmodel

    x = Coord(np.linspace(6000.0, 1000.0, 4000),
    s = []
    for args in zip(ampl, width, pos):
        s.append(gaussianmodel().f(x.data, *args))

    st = np.vstack(s)
    st = NDDataset(data=st,
                   coordset=[range(len(st)), x])

    return st
Пример #24
    def get_conc(self, n_pc=None):
        Computes abstract concentration profile (first in - first out).

        n_pc : int, optional, default:3
            Number of pure species for which the concentration profile must be

            Concentration profile.
        M, K = self.f_ev.shape
        if n_pc is None:
            n_pc = K
        n_pc = min(K, n_pc)

        f = self.f_ev
        b = self.b_ev

        xcoord = Coord(range(n_pc), title="PS#")
        c = NDDataset(
            np.zeros((M, n_pc)),
            coordset=CoordSet(y=self._X.y, x=xcoord),
            title="relative concentration",
            description="Concentration profile from EFA",
            history=f"{datetime.now(timezone.utc)}: created by spectrochempy",
        if self._X.is_masked:
            masked_rows = np.all(self._X.mask, axis=-1)
            masked_rows = np.array([False] * M)

        for i in range(M):
            if masked_rows[i]:
                c[i] = MASKED
            c[i] = np.min((f.data[i, :n_pc], b.data[i, :n_pc][::-1]), axis=0)
        return c
Пример #25
    def _valid_coordset(self, coords):
        # uses in coords_validate and setattr
        if coords is None:

        for k, coord in enumerate(coords):

            if (coord is not None and not isinstance(coord, CoordSet)
                    and coord.data is None):

            # For coord to be acceptable, we require at least a NDArray, a NDArray subclass or a CoordSet
            if not isinstance(coord, (LinearCoord, Coord, CoordSet)):
                if isinstance(coord, NDArray):
                    coord = coords[k] = Coord(coord)
                    raise TypeError(
                        "Coordinates must be an instance or a subclass of Coord class or NDArray, or of "
                        f" CoordSet class, but an instance of {type(coord)} has been passed"

            if self.dims and coord.name in self.dims:
                # check the validity of the given coordinates in terms of size (if it correspond to one of the dims)
                size = coord.size

                if self.implements("NDDataset"):
                    idx = self._get_dims_index(
                        coord.name)[0]  # idx in self.dims
                    if size != self._data.shape[idx]:
                        raise ValueError(
                            f"the size of a coordinates array must be None or be equal"
                            f" to that of the respective `{coord.name}`"
                            f" data dimension but coordinate size={size} != data shape[{idx}]="
                    pass  # bypass this checking for any other derived type (should be done in the subclass)

        coords._parent = self
        return coords
Пример #26
    def __setattr__(self, key, value):

        if key in DEFAULT_DIM_NAME:  # syntax such as ds.x, ds.y, etc...
            # Note the above test is important to avoid errors with traitlets
            # even if it looks redundant with the following
            if key in self.dims:
                if self._coordset is None:
                    # we need to create a coordset first
                        dict((self.dims[i], None) for i in range(self.ndim)))
                idx = self._coordset.names.index(key)
                _coordset = self._coordset
                listcoord = False
                if isinstance(value, list):
                    listcoord = all(
                        [isinstance(item, Coord) for item in value])
                if listcoord:
                    _coordset[idx] = list(
                    _coordset[idx].name = key
                    _coordset[idx]._is_same_dim = True
                elif isinstance(value, CoordSet):
                    if len(value) > 1:
                        value = CoordSet(value)
                    _coordset[idx] = list(value.to_dict().values())[0]
                    _coordset[idx].name = key
                    _coordset[idx]._is_same_dim = True
                elif isinstance(value, (Coord, LinearCoord)):
                    value.name = key
                    _coordset[idx] = value
                    _coordset[idx] = Coord(value, name=key)
                _coordset = self._valid_coordset(_coordset)
                raise AttributeError(f"Coordinate `{key}` is not used.")
            super().__setattr__(key, value)
Пример #27
def _read_txt(*args, **kwargs):
    # read Labspec *txt files or series

    dataset, filename = args
    content = kwargs.get("content", False)

    if content:
        # fid = io.StringIO(content)
        # TODO: get the l list of string

        fid = open(filename, "r", encoding="utf-8")
            lines = fid.readlines()
        except UnicodeDecodeError:
            fid = open(filename, "r", encoding="latin-1")
            lines = fid.readlines()

    if len(lines) == 0:

    # Metadata
    meta = Meta()

    i = 0
    while lines[i].startswith("#"):
        key, val = lines[i].split("=")
        key = key[1:]
        if key in meta.keys():
            key = f"{key} {i}"
        meta[key] = val.strip()
        i += 1

    # .txt extension is fairly common. We determine non labspc files based
    # on the absence of few keys. Two types of files (1D or 2D) are considered:
    labspec_keys_1D = ["Acq. time (s)", "Dark correction"]
    labspec_keys_2D = ["Exposition", "Grating"]

    if all(keywd in meta.keys() for keywd in labspec_keys_1D):
    elif all(keywd in meta.keys() for keywd in labspec_keys_2D):
        # this is not a labspec txt file"

    # read spec
    rawdata = np.genfromtxt(lines[i:], delimiter="\t")

    # populate the dataset
    if rawdata.shape[1] == 2:
        data = rawdata[:, 1][np.newaxis]
        _x = Coord(rawdata[:, 0], title="Raman shift", units="1/cm")
        _y = Coord(None, title="Time", units="s")
        date_acq, _y = _transf_meta(_y, meta)

        data = rawdata[1:, 1:]
        _x = Coord(rawdata[0, 1:], title="Raman shift", units="1/cm")
        _y = Coord(rawdata[1:, 0], title="Time", units="s")
        date_acq, _y = _transf_meta(_y, meta)

    # try to transform to linear coord
    _x.linear = True

    # if success linear should still be True
    if _x.linear:
        _x = LinearCoord(_x)

    # set dataset metadata
    dataset.data = data
    dataset.set_coordset(y=_y, x=_x)
    dataset.title = "Counts"
    dataset.units = None
    dataset.name = filename.stem
    dataset.meta = meta

    # date_acq is Acquisition date at start (first moment of acquisition)
    dataset.description = "Spectrum acquisition : " + str(date_acq)

    # Set the NDDataset date
    dataset._date = datetime.datetime.now(datetime.timezone.utc)
    dataset._modified = dataset.date

    # Set origin, description and history
    dataset.history = f"{dataset.date}:imported from LabSpec6 text file {filename}"

    return dataset
Пример #28
    def __init__(self, dataset, centered=True, standardized=False, scaled=False):
        dataset : |NDDataset| object
            The input dataset has shape (M, N). M is the number of
            observations (for examples a series of IR spectra) while N
            is the number of features (for example the wavenumbers measured
            in each IR spectrum).
        centered : bool, optional, default:True
            If True the data are centered around the mean values: :math:`X' = X - mean(X)`.
        standardized : bool, optional, default:False
            If True the data are scaled to unit standard deviation: :math:`X' = X / \\sigma`.
        scaled : bool, optional, default:False
            If True the data are scaled in the interval [0-1]: :math:`X' = (X - min(X)) / (max(X)-min(X))`
        self.prefs = dataset.preferences

        self._X = X = dataset

        Xsc = X.copy()

        # mean center the dataset
        # -----------------------
        self._centered = centered
        if centered:
            self._center = center = np.mean(X, axis=0)
            Xsc = X - center
            Xsc.title = "centered %s" % X.title

        # Standardization
        # ---------------
        self._standardized = standardized
        if standardized:
            self._std = np.std(Xsc, axis=0)
            Xsc /= self._std
            Xsc.title = "standardized %s" % Xsc.title

        # Scaling
        # -------
        self._scaled = scaled
        if scaled:
            self._min = np.min(Xsc, axis=0)
            self._ampl = np.ptp(Xsc, axis=0)
            Xsc -= self._min
            Xsc /= self._ampl
            Xsc.title = "scaled %s" % Xsc.title

        self._Xscaled = Xsc

        # perform SVD
        # -----------
        svd = SVD(Xsc)
        sigma = svd.s.diag()
        U = svd.U
        VT = svd.VT

        # select n_pc loadings & compute scores
        # --------------------------------------------------------------------

        # loadings

        LT = VT
        LT.title = 'loadings (L^T) of ' + X.name
        LT.history = 'Created by PCA'

        # scores

        S = dot(U, sigma)
        S.title = 'scores (S) of ' + X.name
                       x=Coord(None, labels=['#%d' % (i + 1) for i in range(svd.s.size)], title='principal component'))

        S.description = 'scores (S) of ' + X.name
        S.history = 'Created by PCA'

        self._LT = LT
        self._S = S

        # other attributes
        # ----------------

        self._sv = svd.sv
        self._sv.x.title = 'PC #'

        self._ev = svd.ev
        self._ev.x.title = 'PC #'

        self._ev_ratio = svd.ev_ratio
        self._ev_ratio.x.title = 'PC #'

        self._ev_cum = svd.ev_cum
        self._ev_cum.x.title = 'PC #'

Пример #29
def align(dataset, *others, **kwargs):
    Align individual |NDDataset| along given dimensions using various methods.

    dataset : |NDDataset|
        Dataset on which we want to salign other objects.
    *others : |NDDataset|
        Objects to align.
    dim : str. Optional, default='x'
        Along which axis to perform the alignment.
    dims : list of str, optional, default=None
        Align along all dims defined in dims (if dim is also
        defined, then dims have higher priority).
    method : enum ['outer', 'inner', 'first', 'last', 'interpolate'], optional, default='outer'
        Which method to use for the alignment.

        If align is defined :

        * 'outer' means that a union of the different coordinates is
        achieved (missing values are masked)
        * 'inner' means that the intersection of the coordinates is used
        * 'first' means that the first dataset is used as reference
        * 'last' means that the last dataset is used as reference
        * 'interpolate' means that interpolation is performed relative to
        the first dataset.
    interpolate_method : enum ['linear','pchip']. Optional, default='linear'
        Method of interpolation to performs for the alignment.
    interpolate_sampling : 'auto', int or float. Optional, default='auto'

        * 'auto' : sampling is determined automatically from the existing data.
        * int :  if an integer values is specified, then the
          sampling interval for the interpolated data will be splitted in
          this number of points.
        * float : If a float value is provided, it determines the interval
        between the interpolated data.
    coord : |Coord|, optional, default=None
        coordinates to use for alignment. Ignore those corresponding to the
        dimensions to align.
    copy : bool, optional, default=True
        If False then the returned objects will share memory with the
        original objects, whenever it is possible :
        in principle only if reindexing is not necessary.

    aligned_datasets : tuple of |NDDataset|
        Same objects as datasets with dimensions aligned.

        issued when the dimensions given in `dim` or `dims` argument are not
        compatibles (units, titles, etc...).
    # There is probably better methods, but to simplify dealing with
    # LinearCoord, we transform them in Coord before treatment (going back
    # to linear if possible at the end of the process)

    # TODO: Perform an alignment along numeric labels
    # TODO: add example in docs

    # copy objects?
    copy = kwargs.pop('copy', True)

    # make a single list with dataset and the remaining object
    objects = [dataset] + list(others)

    # should we align on given external coordinates
    extern_coord = kwargs.pop('coord', None)
    if extern_coord and extern_coord.implements('LinearCoord'):
        extern_coord = Coord(extern_coord, linear=False, copy=True)

    # what's the method to use (by default='outer')
    method = kwargs.pop('method', 'outer')

    # trivial cases where alignment is not possible or unecessary
    if not objects:
        warning_('No object provided for alignment!')
        return None

    if len(objects) == 1 and objects[0].implements(
            'NDDataset') and extern_coord is None:
        # no necessary alignment
        return objects

    # evaluate on which axis we align
    axis, dims = dataset.get_axis(only_first=False, **kwargs)

    # check compatibility of the dims and prepare the dimension for alignment
    for axis, dim in zip(axis, dims):

        # get all objets to align
        _objects = {}
        _nobj = 0

        for idx, object in enumerate(objects):

            if not object.implements('NDDataset'):
                    f'Bad object(s) found: {object}. Note that only NDDataset '
                    f'objects are accepted '
                    f'for alignment')
                return None

            _objects[_nobj] = {
                'obj': object.copy(),
                'idx': idx,
            _nobj += 1

        _last = _nobj - 1

        # get the reference object (by default the first, except if method if
        # set to 'last'
        ref_obj_index = 0
        if method == 'last':
            ref_obj_index = _last

        ref_obj = _objects[ref_obj_index]['obj']

        # as we will sort their coordinates at some point, we need to know
        # if the coordinates need to be reversed at
        # the end of the alignment process
        reversed = ref_obj.coordset[dim].reversed
        if reversed:
            ref_obj.sort(descend=False, dim=dim, inplace=True)

        # get the coordset corresponding to the reference object
        ref_obj_coordset = ref_obj.coordset

        # get the coordinate for the reference dimension
        ref_coord = ref_obj_coordset[dim]

        # as we will sort their coordinates at some point, we need to know
        # if the coordinates need to be reversed at
        # the end of the alignment process
        reversed = ref_coord.reversed

        # prepare a new Coord object to store the final new dimension
        new_coord = ref_coord.copy()

        ndec = get_n_decimals(new_coord.data.max(), 1.e-5)

        if new_coord.implements('LinearCoord'):
            new_coord = Coord(new_coord, linear=False, copy=True)

        # loop on all object
        for index, object in _objects.items():

            obj = object['obj']

            if obj is ref_obj:
                # not necessary to compare with itself!

            if reversed:
                obj.sort(descend=False, dim=dim, inplace=True)

            # get the current objet coordinates and check compatibility
            coord = obj.coordset[dim]
            if coord.implements('LinearCoord') or coord.linear:
                coord = Coord(coord, linear=False, copy=True)

            if not coord.is_units_compatible(ref_coord):
                # not compatible, stop everything
                raise UnitsCompatibilityError(
                    'NDataset to align must have compatible units!')

            # do units transform if necesssary so coords can be compared
            if coord.units != ref_coord.units:

            # adjust the new_cord depending on the method of alignement

            new_coord_data = set(np.around(new_coord.data, ndec))
            coord_data = set(np.around(coord.data, ndec))

            if method in ['outer', 'interpolate']:
                # in this case we do a union of the coords (masking the
                # missing values)
                # For method=`interpolate`, the interpolation will be
                # performed in a second step
                new_coord._data = sorted(coord_data | new_coord_data)

            elif method == 'inner':
                # take only intersection of the coordinates
                # and generate a warning if it result something null or
                new_coord._data = sorted(coord_data & new_coord_data)

            elif method in ['first', 'last']:
                # we take the reference coordinates already determined as
                # basis (masking the missing values)

                raise NotImplementedError(f'The method {method} is unknown!')

        # Now perform alignment of all objects on the new coordinates
        for index, object in _objects.items():

            obj = object['obj']

            # get the dim index for the given object
            dim_index = obj.dims.index(dim)

            # prepare slicing keys ; set slice(None) for the untouched
            # dimensions preceeding the dimension of interest
            prepend_keys = [slice(None)] * dim_index

            # New objects for obj must be created with the new coordinates

            # change the data shape
            new_obj_shape = list(obj.shape)
            new_obj_shape[dim_index] = len(new_coord)
            new_obj_data = np.full(new_obj_shape, np.NaN)

            # create new dataset for obj and ref_objects
            if copy:
                new_obj = obj.copy()
                new_obj = obj

            # update the data and mask
            coord = obj.coordset[dim]
            coord_data = set(np.around(coord.data, ndec))

            dim_loc = new_coord._loc2index(sorted(coord_data))
            loc = tuple(prepend_keys + [dim_loc])

            new_obj._data = new_obj_data

            # mask all the data then unmask later the relevant data in
            # the next step

            if not new_obj.is_masked:
                new_obj.mask = MASKED
                new_obj.mask[loc] = False
                mask = new_obj.mask.copy()
                new_obj.mask = MASKED
                new_obj.mask[loc] = mask

            # set the data for the loc
            new_obj._data[loc] = obj.data

            # update the coordinates
            new_coordset = obj.coordset.copy()
            if coord.is_labeled:
                label_shape = list(coord.labels.shape)
                label_shape[0] = new_coord.size
                new_coord._labels = np.zeros(tuple(label_shape)).astype(
                new_coord._labels[:] = '--'
                new_coord._labels[dim_loc] = coord.labels
            setattr(new_coordset, dim, new_coord)
            new_obj._coordset = new_coordset

            # reversed?
            if reversed:
                # we must reverse the given coordinates
                new_obj.sort(descend=reversed, dim=dim, inplace=True)

            # update the _objects
            _objects[index]['obj'] = new_obj

            if method == 'interpolate':
                    'Interpolation not yet implemented - for now equivalent '
                    'to `outer`')

        # the new transformed object must be in the same order as the passed
        # objects
        # and the missing values must be masked (for the moment they are defined to NaN

        for index, object in _objects.items():
            obj = object['obj']
            # obj[np.where(np.isnan(obj))] = MASKED  # mask NaN values
                obj))] = 99999999999999.  # replace NaN values (to simplify
            # comparisons)
            idx = int(object['idx'])
            objects[idx] = obj

            # we also transform into linear coord if possible ?
            pass  # TODO:

    # Now return

    return tuple(objects)
Пример #30
def concatenate(*datasets, **kwargs):
    Concatenation of |NDDataset| objects along a given axis.

    Any number of |NDDataset| objects can be concatenated (by default
    the last on the last dimension). For this operation
    to be defined the following must be true :

        #. all inputs must be valid |NDDataset| objects;
        #. units of data must be compatible
        #. concatenation is along the axis specified or the last one;
        #. along the non-concatenated dimensions, shapes must match.

    *datasets : positional |NDDataset| arguments
        The dataset(s) to be concatenated to the current dataset. The datasets
        must have the same shape, except in the dimension corresponding to axis
        (the last, by default).
        Optional keyword parameters (see Other Parameters).

        A |NDDataset| created from the contenations of the |NDDataset| input objects.

    Other Parameters
    dims : str, optional, default='x'
        The dimension along which the operation is applied.

    axis : int, optional
        The axis along which the operation is applied.

    See Also
    stack : Stack of |NDDataset| objects along a new dimension.

    >>> A = scp.read('irdata/nh4y-activation.spg', protocol='omnic')
    >>> B = scp.read('irdata/nh4y-activation.scp')
    >>> C = scp.concatenate(A[10:], B[3:5], A[:10], axis=0)
    >>> A[10:].shape, B[3:5].shape, A[:10].shape, C.shape
    ((45, 5549), (2, 5549), (10, 5549), (57, 5549))


    >>> D = A.concatenate(B, B, axis=0)
    >>> A.shape, B.shape, D.shape
    ((55, 5549), (55, 5549), (165, 5549))

    >>> E = A.concatenate(B, axis=1)
    >>> A.shape, B.shape, E.shape
    ((55, 5549), (55, 5549), (55, 11098))

    # check uise
    if "force_stack" in kwargs:
        warn("force_stack not used anymore, use stack() instead",
        return stack(datasets)

    # get a copy of input datasets in order that input data are not modified
    datasets = _get_copy(datasets)

    # get axis from arguments
    axis, dim = datasets[0].get_axis(**kwargs)

    # check shapes, except for dim along which concatenation will be done
    shapes = {ds.shape[:axis] + ds.shape[axis + 1:] for ds in datasets}
    if len(shapes) != 1:
        raise DimensionsCompatibilityError(
            "all input arrays must have the same shape")

    # check units
    units = tuple(set(ds.units for ds in datasets))
    if len(units) == 1:
        units = datasets[0].units
        # check compatibility
        for i, u1 in enumerate(units[:-1]):
            for u2 in units[i + 1:]:
                if u1.dimensionality != u2.dimensionality:
                    raise UnitsCompatibilityError(
                        f"Units of the data are {[str(u) for u in units]}. The datasets can't be concatenated"
        # should be compatible, so convert
        units = datasets[0].units
        for ds in datasets[1:]:
            if ds.units != units:

    # concatenate or stack the data array + mask
    # --------------------------------------------

    sss = []
    for i, dataset in enumerate(datasets):
        d = dataset.masked_data

    sconcat = np.ma.concatenate(sss, axis=axis)

    data = np.asarray(sconcat)
    mask = sconcat.mask

    # now manage coordinates and labels
    coords = datasets[0].coordset

    if coords is not None:

        if not coords[dim].is_empty:

            labels = []
            if coords[dim].is_labeled:
                for ds in datasets:

            if coords[dim].implements() in ["Coord", "LinearCoord"]:
                coords[dim] = Coord(coords[dim], linear=False)
                if labels != []:
                    coords[dim]._labels = np.concatenate(labels)
            elif coords[dim].implements("CoordSet"):
                if labels != []:
                    labels = np.array(labels)
                    for i, coord in enumerate(coords[dim]):
                        if labels[:i].size != 0:
                            coord._labels = np.concatenate(
                                [label for label in labels[:, i]])

            coords[dim]._data = np.concatenate(
                tuple((ds.coordset[dim].data for ds in datasets)))

    out = dataset.copy()
    out._data = data
    if coords is not None:
        out._coordset[dim] = coords[dim]
    out._mask = mask
    out._units = units

    out.description = f"Concatenation of {len(datasets)}  datasets:\n"
    out.description += "( {}".format(datasets[0].name)
    out.title = datasets[0].title
    authortuple = (datasets[0].author, )

    for dataset in datasets[1:]:

        if out.title != dataset.title:
                "Different data title => the title is that of the 1st dataset")

        if not (dataset.author in authortuple):
            authortuple = authortuple + (dataset.author, )

        out.author = " & ".join([str(author) for author in authortuple])

        out.description += ", {}".format(dataset.name)

    out.description += " )"
    out._date = out._modified = datetime.datetime.now(datetime.timezone.utc)
    out._history = [str(out.date) + ": Created by concatenate"]

    return out