Пример #1
0
    def _get_n_pc(self, n_pc=None):

        max_n_pc = self.ev.size
        if n_pc is None:
            n_pc = max_n_pc
            return n_pc
        elif isinstance(n_pc, int):
            n_pc = min(n_pc, max_n_pc)
            return n_pc
        elif n_pc == 'auto':
            M, N = self.X.shape
            if M >= N:
                n_pc = self._infer_pc_()
                return n_pc
            else:
                info_('Cannot use `auto` if n_observations < '
                      'n_features. Try with threshold 0.9999')
                n_pc = 0.9999

        if 0 < n_pc < 1.0:
            # number of PC for which the cumulated explained variance is
            # less than a given ratio
            n_pc = np.searchsorted(self.ev_cum.data / 100., n_pc) + 1
            return n_pc
        else:
            raise ValueError('could not get a valid number of components')
Пример #2
0
        def nlssubprob(V, W, Hinit, tol, maxiter):
            """
            H, grad : output solution and gradient
            iter : #iterations used
            V, W : constant matrices
            Hinit : initial solution
            tol : stopping tolerance
            maxiter : limit of iterations
            """

            H = Hinit
            WtV = np.dot(W.T, V)
            WtW = np.dot(W.T, W)

            alpha = 1
            beta = 0.1

            for n_iter in range(1, maxiter + 1):
                grad = np.dot(WtW, H) - WtV
                if norm(grad * np.logical_or(grad < 0, H > 0)) < tol:
                    break

            Hp = H

            # search step size
            for inner_iter in range(20):
                # gradient step
                Hn = H - alpha * grad
                # gradient step
                Hn *= Hn > 0
                d = Hn - H
                gradd = np.dot(grad.ravel(), d.ravel())
                dQd = np.dot(np.dot(WtW, d).ravel(), d.ravel())
                suff_decr = 0.99 * gradd + 0.5 * dQd < 0
                if inner_iter == 0:
                    decr_alpha = not suff_decr
                    Hp = H
                if decr_alpha:
                    if suff_decr:
                        H = Hn
                        break
                    else:
                        alpha = alpha * beta
                else:
                    if not suff_decr or (Hp == Hn).all():
                        H = Hp
                        break
                    else:
                        alpha = alpha / beta
                        Hp = Hn

            if n_iter == maxiter:
                info_('Max iter in nlssubprob')

            return H, grad, n_iter
Пример #3
0
        def callback(*args, **kwargs):
            """
            callback log.info function
            """
            global niter, chi2, everyiter, ncalls
            niter += 1

            if niter % everyiter != 0:
                return

            if not self.silent:
                display.clear_output(wait=True)
                info_(("Iterations: %d, Calls: %d (chi2: %.5f)" %
                       (niter, ncalls, chi2)))
                sys.stdout.flush()
Пример #4
0
def check_filename_to_save(dataset,
                           filename=None,
                           save_as=False,
                           confirm=True,
                           **kwargs):

    from spectrochempy import NO_DIALOG
    from spectrochempy.core import info_

    NODIAL = (NO_DIALOG
              or "DOC_BUILDING" in environ) and "KEEP_DIALOGS" not in environ

    if filename and pathclean(filename).parent.resolve() == Path.cwd():
        filename = Path.cwd() / filename

    if not filename or save_as or filename.exists():

        from spectrochempy.core import save_dialog

        # no filename provided
        open_diag = True
        caption = "Save as ..."
        if filename is None or (NODIAL and pathclean(filename).is_dir()):
            filename = dataset.name
            filename = filename + kwargs.get("suffix", ".scp")

        # existing filename provided
        elif filename.exists():
            if confirm:
                caption = "File exists. Confirm overwrite"
            else:
                info_(
                    f"A file {filename} was present and has been overwritten.")
                open_diag = False

        if not NODIAL and open_diag:

            filename = save_dialog(
                caption=kwargs.pop("caption", caption),
                filename=filename,
                filters=kwargs.pop("filetypes", ["All file types (*.*)"]),
                **kwargs,
            )
            if filename is None:
                # this is probably due to a cancel action for an open dialog.
                return

    return pathclean(filename)
Пример #5
0
def test_ndmath_classmethod_implementation(nd2d, name):
    nd = nd2d.copy()
    try:
        getattr(NDDataset, name)
    except AttributeError:
        info_("\n{} is not yet implemented".format(name))
    try:
        getattr(np.ma, name)
        getattr(np, name)(nd)
    except AttributeError:
        info_("\n{} is not a np.ma method".format(name))
    except TypeError as e:
        if "required positional" in e.args[0]:
            pass
        else:
            raise TypeError(*e.args)
    def _enabled_process(self, flag):
        if flag:
            self._io.children = [
                self._load_button,
                self._process_button,
                self._save_button,
            ]
            self._controls.children = [
                self._limits_control,
                self._method_control,
                self._interpolation_control,
                self._ranges_control,
            ]

        else:
            self._io.children = [self._load_button]
            self._controls.children = []
            with self._output:
                info_("No data have been defined.\n"
                      "Use the upload button to load data to be processed!.")
Пример #7
0
def _read_spa(*args, **kwargs):
    dataset, filename = args
    content = kwargs.get("content", False)

    if content:
        fid = io.BytesIO(content)
    else:
        fid = open(filename, "rb")

    return_ifg = kwargs.get("return_ifg", None)

    # Read name:
    # The name  starts at position hex 1e = decimal 30. Its max length
    # is 256 bytes. It is the original filename under which the spectrum has
    # been saved: it won't match with the actual filename if a subsequent
    # renaming has been done in the OS.
    spa_name = _readbtext(fid, 30, 256)

    # The acquisition date (GMT) is at hex 128 = decimal 296.
    # Second since 31/12/1899, 00:00
    fid.seek(296)
    timestamp = _fromfile(fid, dtype="uint32", count=1)
    acqdate = datetime(1899, 12, 31, 0, 0, tzinfo=timezone.utc) + timedelta(
        seconds=int(timestamp)
    )
    acquisitiondate = acqdate

    # Transform back to timestamp for storage in the Coord object
    # use datetime.fromtimestamp(d, timezone.utc)) to transform back to datetime object
    timestamp = acqdate.timestamp()

    # From hex 120 = decimal 304, the spectrum is described
    # by a block of lines starting with "key values",
    # for instance hex[02 6a 6b 69 1b 03 82] -> dec[02 106  107 105 27 03 130]
    # Each of these lines provides positions of data and metadata in the file:
    #
    #     key: hex 02, dec  02: position of spectral header (=> nx,
    #                                 firstx, lastx, nscans, nbkgscans)
    #     key: hex 03, dec  03: intensity position
    #     #     key: hex 04, dec  04: user text position (custom info, can be present
    #                           several times. The text length is five bytes later)
    #     key: hex 1B, dec  27: position of History text, The text length
    #                           is five bytes later
    #     key: hex 53, dec  83: probably not a position, present when 'Retrieved from library'
    #     key: hex 64, dec 100: ?
    #     key: hex 66  dec 102: sample interferogram
    #     key: hex 67  dec 103: background interferogram
    #     key: hex 69, dec 105: ?
    #     key: hex 6a, dec 106: ?
    #     key: hex 80, dec 128: ?
    #     key: hex 82, dec 130: position of 'Experiment Information', The text length
    #                           is five bytes later. The block gives Experiment filename (at +10)
    #                           Experiment title (+90), custom text (+254), accessory name (+413)
    #     key: hex 92, dec 146: position of 'custom infos', The text length
    #                           is five bytes later.
    #
    # The line preceding the block start with '01' or '0A'
    # The lines after the block generally start with '00', except in few cases where
    # they start by '01'. In such cases, the '53' key is also present
    # (before the '1B').

    # scan "key values"
    pos = 304
    spa_comments = []  # several custom comments can be present
    while "continue":
        fid.seek(pos)
        key = _fromfile(fid, dtype="uint8", count=1)

        # print(key, end=' ; ')

        if key == 2:
            # read the position of the header
            fid.seek(pos + 2)
            pos_header = _fromfile(fid, dtype="uint32", count=1)
            info = _read_header(fid, pos_header)

        elif key == 3 and return_ifg is None:
            intensities = _getintensities(fid, pos)

        elif key == 4:
            fid.seek(pos + 2)
            comments_pos = _fromfile(fid, "uint32", 1)
            fid.seek(pos + 6)
            comments_len = _fromfile(fid, "uint32", 1)
            fid.seek(comments_pos)
            spa_comments.append(fid.read(comments_len).decode("latin-1", "replace"))

        elif key == 27:
            fid.seek(pos + 2)
            history_pos = _fromfile(fid, "uint32", 1)
            fid.seek(pos + 6)
            history_len = _fromfile(fid, "uint32", 1)
            spa_history = _readbtext(fid, history_pos, history_len)

        elif key == 102 and return_ifg == "sample":
            s_ifg_intensities = _getintensities(fid, pos)

        elif key == 103 and return_ifg == "background":
            b_ifg_intensities = _getintensities(fid, pos)

        elif key == 00 or key == 1:
            break

        pos += 16

    fid.close()

    if (return_ifg == "sample" and "s_ifg_intensities" not in locals()) or (
        return_ifg == "background" and "b_ifg_intensities" not in locals()
    ):
        info_("No interferogram found, read_spa returns None")
        return None
    elif return_ifg == "sample":
        intensities = s_ifg_intensities
    elif return_ifg == "background":
        intensities = b_ifg_intensities
    # load intensity into the  NDDataset
    dataset.data = np.array(intensities[np.newaxis], dtype="float32")

    if return_ifg == "background":
        title = "sample acquisition timestamp (GMT)"  # bckg acquisition date is not known for the moment...
    else:
        title = "acquisition timestamp (GMT)"  # no ambiguity here

    _y = Coord(
        [timestamp],
        title=title,
        units="s",
        labels=([acquisitiondate], [filename]),
    )

    # useful when a part of the spectrum/ifg has been blanked:
    dataset.mask = np.isnan(dataset.data)

    if return_ifg is None:
        default_description = f"# Omnic name: {spa_name}\n# Filename: {filename.name}"
        dataset.units = info["units"]
        dataset.title = info["title"]

        # now add coordinates
        nx = info["nx"]
        firstx = info["firstx"]
        lastx = info["lastx"]
        xunit = info["xunits"]
        xtitle = info["xtitle"]

        spacing = (lastx - firstx) / (nx - 1)

        _x = LinearCoord(
            offset=firstx, increment=spacing, size=nx, title=xtitle, units=xunit
        )

    else:  # interferogram
        if return_ifg == "sample":
            default_description = (
                f"# Omnic name: {spa_name} : sample IFG\n # Filename: {filename.name}"
            )
        else:
            default_description = f"# Omnic name: {spa_name} : background IFG\n # Filename: {filename.name}"
        spa_name += ": Sample IFG"
        dataset.units = "V"
        dataset.title = "detector signal"
        _x = LinearCoord(
            offset=0,
            increment=1,
            size=len(intensities),
            title="data points",
            units=None,
        )

    dataset.set_coordset(y=_y, x=_x)
    dataset.name = spa_name  # to be consistent with omnic behaviour
    dataset.filename = str(filename)

    # Set origin, description, history, date
    # Omnic spg file don't have specific "origin" field stating the oirigin of the data

    dataset.description = kwargs.get("description", default_description) + "\n"
    if len(spa_comments) > 1:
        dataset.description += "# Comments from Omnic:\n"
        for comment in spa_comments:
            dataset.description += comment + "\n---------------------\n"

    dataset.history = str(datetime.now(timezone.utc)) + ":imported from spa file(s)"

    if "spa_history" in locals():
        if len("spa_history".strip(" ")) > 0:
            dataset.history = (
                "Data processing history from Omnic :\n------------------------------------\n"
                + spa_history
            )

    dataset._date = datetime.now(timezone.utc)

    dataset.meta.collection_length = info["collection_length"] / 100 * ur("s")
    dataset.meta.optical_velocity = info["optical_velocity"]
    dataset.meta.laser_frequency = info["reference_frequency"] * ur("cm^-1")

    if dataset.x.units is None and dataset.x.title == "data points":
        # interferogram
        dataset.meta.interferogram = True
        dataset.meta.td = list(dataset.shape)
        dataset.x._zpd = int(np.argmax(dataset)[-1])
        dataset.x.set_laser_frequency()
        dataset.x._use_time_axis = (
            False  # True to have time, else it will be optical path difference
        )

    return dataset
Пример #8
0
def _read_header(fid, pos):
    """
    read spectrum/ifg/series header

    Parameters
    ----------
    fid : BufferedReader
        The buffered binary stream.

    pos : int
        The position of the header (see Notes).

    Returns
    -------
        dict, int
        Dictionary and current position in file

    Notes
    -----
        So far, the header structure is as follows:
        - starts with b'\x01' , b'\x02', b'\x03' ... maybe indicating the header "type"
        - nx (UInt32): 4 bytes behind
        - xunits (UInt8): 8 bytes behind. So far, we have the following correspondence:
            `x\01`: wavenumbers, cm-1
            `x\02`: datapoints (interferogram)
            `x\03`: wavelength, nm
            `x\04': wavelength, um
            `x\20': Raman shift, cm-1
        - data units (UInt8): 12 bytes behind. So far, we have the following correspondence:
            `x\11`: absorbance
            `x\10`: transmittance (%)
            `x\0B`: reflectance (%)
            `x\0C`: Kubelka_Munk
            `x\16`:  Volts (interferogram)
            `x\1A`:  photoacoustic
            `x\1F`: Raman intensity
        - first x value (float32), 16 bytes behind
        - last x value (float32), 20 bytes behind
        - ... unknown
        - scan points (UInt32), 28 bytes behind
        - zpd (UInt32),  32 bytes behind
        - number of scans (UInt32), 36 bytes behind
        - ... unknown
        - number of background scans (UInt32), 52 bytes behind
        - ... unknown
        - collection length in 1/100th of sec (UIint32), 68 bytes behind
        - ... unknown
        - reference frequency (float32), 80 bytes behind
        - ...
        - optical velocity (float32), 188 bytes behind
        - ...
        - spectrum history (text), 208 bytes behind

        For "rapid-scan" srs files:
        - series name (text), 938 bytes behind
        - collection length (float32), 1002 bytes behind
        - last y (float 32), 1006 bytes behind
        - first y (float 32), 1010 bytes behind
        - ny (UInt32), 1026
        ... y unit could be at pos+1030 with 01 = minutes ?
        - history (text), 1200 bytes behind (only initila hgistopry.
           When reprocessed, updated history is at the end of the file after the
           b`\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF` sequence
    """

    out = {}
    # determine the type of file
    fid.seek(0)
    bytes = fid.read(18)
    if bytes == b"Spectral Data File":
        filetype = "spa, spg"
    elif bytes == b"Spectral Exte File":
        filetype = "srs"

    # nx
    fid.seek(pos + 4)
    out["nx"] = _fromfile(fid, "uint32", count=1)

    # xunits
    fid.seek(pos + 8)
    key = _fromfile(fid, dtype="uint8", count=1)
    if key == 1:
        out["xunits"] = "cm^-1"
        out["xtitle"] = "wavenumbers"
    elif key == 2:
        out["xunits"] = None
        out["xtitle"] = "data points"
    elif key == 3:  # pragma: no cover
        out["xunits"] = "nm"
        out["xtitle"] = "wavelengths"
    elif key == 4:  # pragma: no cover
        out["xunits"] = "um"
        out["xtitle"] = "wavelengths"
    elif key == 32:  # pragma: no cover
        out["xunits"] = "cm^-1"
        out["xtitle"] = "raman shift"
    else:  # pragma: no cover
        out["xunits"] = None
        out["xtitle"] = "xaxis"
        info_("The nature of x data is not recognized, xtitle is set to 'xaxis'")

    # data units
    fid.seek(pos + 12)
    key = _fromfile(fid, dtype="uint8", count=1)
    if key == 17:
        out["units"] = "absorbance"
        out["title"] = "absorbance"
    elif key == 16:  # pragma: no cover
        out["units"] = "percent"
        out["title"] = "transmittance"
    elif key == 11:  # pragma: no cover
        out["units"] = "percent"
        out["title"] = "reflectance"
    elif key == 12:  # pragma: no cover
        out["units"] = None
        out["title"] = "log(1/R)"
    elif key == 20:  # pragma: no cover
        out["units"] = "Kubelka_Munk"
        out["title"] = "Kubelka-Munk"
    elif key == 21:
        out["units"] = None
        out["title"] = "reflectance"
    elif key == 22:
        out["units"] = "V"
        out["title"] = "detector signal"
    elif key == 26:  # pragma: no cover
        out["units"] = None
        out["title"] = "photoacoustic"
    elif key == 31:  # pragma: no cover
        out["units"] = None
        out["title"] = "Raman intensity"
    else:  # pragma: no cover
        out["units"] = None
        out["title"] = "intensity"
        info_("The nature of data is not recognized, title set to 'Intensity'")

    # firstx, lastx
    fid.seek(pos + 16)
    out["firstx"] = _fromfile(fid, "float32", 1)
    fid.seek(pos + 20)
    out["lastx"] = _fromfile(fid, "float32", 1)
    fid.seek(pos + 28)

    out["scan_pts"] = _fromfile(fid, "uint32", 1)
    fid.seek(pos + 32)
    out["zpd"] = _fromfile(fid, "uint32", 1)
    fid.seek(pos + 36)
    out["nscan"] = _fromfile(fid, "uint32", 1)
    fid.seek(pos + 52)
    out["nbkgscan"] = _fromfile(fid, "uint32", 1)
    fid.seek(pos + 68)
    out["collection_length"] = _fromfile(fid, "uint32", 1)
    fid.seek(pos + 80)
    out["reference_frequency"] = _fromfile(fid, "float32", 1)
    fid.seek(pos + 188)
    out["optical_velocity"] = _fromfile(fid, "float32", 1)

    if filetype == "spa, spg":
        out["history"] = _readbtext(fid, pos + 208, None)

    if filetype == "srs":
        if out["nbkgscan"] == 0:
            # an interferogram in rapid scan mode
            if out["firstx"] > out["lastx"]:
                out["firstx"], out["lastx"] = out["lastx"], out["firstx"]

        out["name"] = _readbtext(fid, pos + 938, 256)
        fid.seek(pos + 1002)
        out["collection_length"] = _fromfile(fid, "float32", 1) * 60
        fid.seek(pos + 1006)
        out["lasty"] = _fromfile(fid, "float32", 1)
        fid.seek(pos + 1010)
        out["firsty"] = _fromfile(fid, "float32", 1)
        fid.seek(pos + 1026)
        out["ny"] = _fromfile(fid, "uint32", 1)
        #  y unit could be at pos+1030 with 01 = minutes ?
        out["history"] = _readbtext(fid, pos + 1200, None)

        if _readbtext(fid, pos + 208, 256)[:10] == "Background":
            # it is the header of a background
            out["background_name"] = _readbtext(fid, pos + 208, 256)[10:]

    return out
Пример #9
0
    def __init__(self, dataset, guess, **kwargs):

        # list all default arguments:

        tol = kwargs.get("tol", 0.1)
        maxit = kwargs.get("maxit", 50)
        maxdiv = kwargs.get("maxdiv", 5)

        nonnegConc = kwargs.get("nonnegConc", "all")

        unimodConc = kwargs.get("unimodConc", "all")
        unimodConcTol = kwargs.get("unimodConcTol", 1.1)
        unimodConcMod = kwargs.get("unimodMod", "strict")
        if "unimodTol" in kwargs.keys():
            warnings.warn("unimodTol deprecated, use unimodConcTol instead",
                          DeprecationWarning)
            unimodConcTol = kwargs.get("unimodTol", 1.1)
        if "unimodMod" in kwargs.keys():
            warnings.warn("unimodMod deprecated, use unimodConcMod instead",
                          DeprecationWarning)
            unimodConcMod = kwargs.get("unimodConcMod", "strict")

        monoDecConc = kwargs.get("monoDecConc", None)
        monoIncTol = kwargs.get("monoIncTol", 1.1)
        monoIncConc = kwargs.get("monoIncConc", None)
        monoDecTol = kwargs.get("monoDecTol", 1.1)

        closureConc = kwargs.get("closureConc", None)
        closureTarget = kwargs.get("closureTarget", "default")
        closureMethod = kwargs.get("closureMethod", "scaling")

        hardConc = kwargs.get("hardConc", None)
        getConc = kwargs.get("getConc", None)
        argsGetConc = kwargs.get("argsGetConc", None)
        hardC_to_C_idx = kwargs.get("hardC_to_C_idx", "default")

        unimodSpec = kwargs.get("unimodSpec", None)
        unimodSpecTol = kwargs.get("unimodSpecTol", 1.1)
        unimodSpecMod = kwargs.get("unimodSpecMod", "strict")

        nonnegSpec = kwargs.get("nonnegSpec", "all")

        normSpec = kwargs.get("normSpec", None)

        if "verbose" in kwargs.keys():
            warnings.warn(
                "verbose deprecated. Instead, use set_loglevel(INFO) before launching MCRALS",
                DeprecationWarning,
            )
            set_loglevel(INFO)

        # Check initial data
        # ------------------------------------------------------------------------

        initConc, initSpec = False, False

        if type(guess) is np.ndarray:
            guess = NDDataset(guess)

        X = dataset

        if X.shape[0] == guess.shape[0]:
            initConc = True
            C = guess.copy()
            C.name = "Pure conc. profile, mcs-als of " + X.name
            nspecies = C.shape[1]

        elif X.shape[1] == guess.shape[1]:
            initSpec = True
            St = guess.copy()
            St.name = "Pure spectra profile, mcs-als of " + X.name
            nspecies = St.shape[0]

        else:
            raise ValueError("the dimensions of guess do not match the data")

        ny, _ = X.shape

        # makes a PCA with same number of species for further comparison
        Xpca = PCA(X).reconstruct(n_pc=nspecies)

        # reset default text to indexes
        # ------------------------------

        if nonnegConc == "all":
            nonnegConc = np.arange(nspecies)
        elif nonnegConc is None:
            nonnegConc = []
        elif nonnegConc != [] and (len(nonnegConc) > nspecies
                                   or max(nonnegConc) + 1 > nspecies):
            raise ValueError(
                f"The guess has only {nspecies} species, please check nonnegConc"
            )

        if unimodConc == "all":
            unimodConc = np.arange(nspecies)
        elif unimodConc is None:
            unimodConc = []
        elif unimodConc != [] and (len(unimodConc) > nspecies
                                   or max(unimodConc) + 1 > nspecies):
            raise ValueError(
                f"The guess has only {nspecies} species, please check unimodConc"
            )

        if closureTarget == "default":
            closureTarget = np.ones(ny)
        elif len(closureTarget) != ny:
            raise ValueError(
                f"The data contain only {ny} observations, please check closureTarget"
            )

        if hardC_to_C_idx == "default":
            hardC_to_C_idx = np.arange(nspecies)
        elif len(hardC_to_C_idx
                 ) > nspecies or max(hardC_to_C_idx) + 1 > nspecies:
            raise ValueError(
                f"The guess has only {nspecies} species, please check hardC_to_C_idx"
            )

        # constraints on spectra

        if unimodSpec == "all":
            unimodSpec = np.arange(nspecies)
        elif unimodSpec is None:
            unimodSpec = []
        elif unimodSpec != [] and (len(unimodSpec) > nspecies
                                   or max(unimodSpec) + 1 > nspecies):
            raise ValueError(
                f"The guess has only {nspecies} species, please check unimodSpec"
            )

        if nonnegSpec == "all":
            nonnegSpec = np.arange(nspecies)
        elif nonnegSpec is None:
            nonnegSpec = []
        elif nonnegSpec != [] and (len(nonnegSpec) > nspecies
                                   or max(nonnegSpec) + 1 > nspecies):
            raise ValueError(
                f"The guess has only {nspecies} species, please check nonnegSpec"
            )

        # Compute initial spectra or concentrations   (first iteration...)
        # ------------------------------------------------------------------------

        if initConc:
            if C.coordset is None:
                C.set_coordset(y=X.y, x=C.x)
            St = NDDataset(np.linalg.lstsq(C.data, X.data, rcond=None)[0])
            St.name = "Pure spectra profile, mcs-als of " + X.name
            St.title = X.title
            cy = C.x.copy() if C.x else None
            cx = X.x.copy() if X.x else None
            St.set_coordset(y=cy, x=cx)

        if initSpec:
            if St.coordset is None:
                St.set_coordset(y=St.y, x=X.x)
            Ct = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0]
            C = NDDataset(Ct.T)
            C.name = "Pure conc. profile, mcs-als of " + X.name
            C.title = "concentration"
            cx = St.y.copy() if St.y else None
            cy = X.y.copy() if X.y else None
            C.set_coordset(y=cy, x=cx)

        change = tol + 1
        stdev = X.std()
        niter = 0
        ndiv = 0

        log = "*** ALS optimisation log***\n"
        log += "#iter     Error/PCA        Error/Exp      %change \n"
        log += "------------------------------------------------- \n"
        info_(log)

        while change >= tol and niter < maxit and ndiv < maxdiv:

            C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T
            niter += 1

            # Force non-negative concentration
            # --------------------------------
            if nonnegConc is not None:
                for s in nonnegConc:
                    C.data[:, s] = C.data[:, s].clip(min=0)

            # Force unimodal concentration
            # ----------------------------
            if unimodConc != []:
                C.data = _unimodal_2D(
                    C.data,
                    idxes=unimodConc,
                    axis=0,
                    tol=unimodConcTol,
                    mod=unimodConcMod,
                )

            # Force monotonic increase
            # ------------------------
            if monoIncConc is not None:
                for s in monoIncConc:
                    for curid in np.arange(ny - 1):
                        if C.data[curid + 1,
                                  s] < C.data[curid, s] / monoIncTol:
                            C.data[curid + 1, s] = C.data[curid, s]

            # Force monotonic decrease
            # ----------------------------------------------
            if monoDecConc is not None:
                for s in monoDecConc:
                    for curid in np.arange(ny - 1):
                        if C.data[curid + 1,
                                  s] > C.data[curid, s] * monoDecTol:
                            C.data[curid + 1, s] = C.data[curid, s]

            # Closure
            # ------------------------------------------
            if closureConc is not None:
                if closureMethod == "scaling":
                    Q = np.linalg.lstsq(C.data[:, closureConc],
                                        closureTarget.T,
                                        rcond=None)[0]
                    C.data[:, closureConc] = np.dot(C.data[:, closureConc],
                                                    np.diag(Q))
                elif closureMethod == "constantSum":
                    totalConc = np.sum(C.data[:, closureConc], axis=1)
                    C.data[:, closureConc] = (C.data[:, closureConc] *
                                              closureTarget[:, None] /
                                              totalConc[:, None])

            # external concentration profiles
            # ------------------------------------------
            if hardConc is not None:
                extOutput = getConc(*argsGetConc)
                if isinstance(extOutput, dict):
                    fixedC = extOutput["concentrations"]
                    argsGetConc = extOutput["new_args"]
                else:
                    fixedC = extOutput

                C.data[:, hardConc] = fixedC[:, hardC_to_C_idx]

            # stores C in C_hard
            Chard = C.copy()

            # compute St
            St.data = np.linalg.lstsq(C.data, X.data, rcond=None)[0]

            # stores St in Stsoft
            Stsoft = St.copy()

            # Force non-negative spectra
            # --------------------------
            if nonnegSpec is not None:
                St.data[nonnegSpec, :] = St.data[nonnegSpec, :].clip(min=0)

            # Force unimodal spectra
            # ----------------------------
            if unimodSpec != []:
                St.data = _unimodal_2D(
                    St.data,
                    idxes=unimodSpec,
                    axis=1,
                    tol=unimodSpecTol,
                    mod=unimodSpecMod,
                )

            # recompute C for consistency(soft modeling)
            C.data = np.linalg.lstsq(St.data.T, X.data.T)[0].T

            # rescale spectra & concentrations
            if normSpec == "max":
                alpha = np.max(St.data, axis=1).reshape(nspecies, 1)
                St.data = St.data / alpha
                C.data = C.data * alpha.T
            elif normSpec == "euclid":
                alpha = np.linalg.norm(St.data, axis=1).reshape(nspecies, 1)
                St.data = St.data / alpha
                C.data = C.data * alpha.T

            # compute residuals
            # -----------------
            X_hat = dot(C, St)
            stdev2 = (X_hat - X.data).std()
            change = 100 * (stdev2 - stdev) / stdev
            stdev = stdev2

            stdev_PCA = (X_hat - Xpca.data).std()  #

            logentry = "{:3d}      {:10f}      {:10f}      {:10f}".format(
                niter, stdev_PCA, stdev2, change)
            log += logentry + "\n"
            info_(logentry)

            if change > 0:
                ndiv += 1
            else:
                ndiv = 0
                change = -change

            if change < tol:
                logentry = "converged !"
                log += logentry + "\n"
                info_(logentry)

            if ndiv == maxdiv:
                logline = (
                    f"Optimization not improved since {maxdiv} iterations... unconverged "
                    f"or 'tol' set too small ?\n")
                logline += "Stop ALS optimization"
                log += logline + "\n"
                info_(logline)

            if niter == maxit:
                logline = "Convergence criterion ('tol') not reached after {:d} iterations.".format(
                    maxit)
                logline += "Stop ALS optimization"
                log += logline + "\n"
                info_(logline)

        self._X = X
        self._params = {
            "tol": tol,
            "maxit": maxit,
            "maxdiv": maxdiv,
            "nonnegConc": nonnegConc,
            "unimodConc": unimodConc,
            "unimodConcTol": unimodConcTol,
            "unimodConcMod": unimodConcMod,
            "closureConc": closureConc,
            "closureTarget ": closureTarget,
            "closureMethod": closureMethod,
            "monoDecConc": monoDecConc,
            "monoDecTol": monoDecTol,
            "monoIncConc": monoIncConc,
            "monoIncTol": monoIncTol,
            "hardConc": hardConc,
            "getConc": getConc,
            "argsGetConc": argsGetConc,
            "hardC_to_C_idx": hardC_to_C_idx,
            "nonnegSpec": nonnegSpec,
            "unimodSpec": unimodConc,
            "unimodSpecTol": unimodSpecTol,
            "unimodSpecMod": unimodSpecMod,
            "normSpec": normSpec,
        }

        self._C = C
        if hardConc is not None:
            self._fixedC = fixedC
            self._extOutput = extOutput
        else:
            self._fixedC = None
            self._extOutput = None

        self._St = St
        self._log = log

        self._Stsoft = Stsoft
        self._Chard = Chard
Пример #10
0
    def _interpret(self, script):
        """
        Interpreter of the script content
        """
        # init some flags
        modlabel = None
        common = False
        fixed = False
        reference = False

        # create a new FitParameters instance
        fp = FitParameters()

        # set the number of experiments
        fp.expnumber = len(self.datasets)
        info_("The number of experiment(s) is set to %d" % fp.expnumber)

        # start interpreting ------------------------------------------------------
        lines = script.split('\n')
        lc = 0

        for item in lines:
            lc += 1  # -------------- count the lines
            line = item.strip()
            if line == '' or line.startswith("#"):
                # this is a blank or comment line, go to next line
                continue
            # split around the semi-column
            s = line.split(':')
            if len(s) != 2:
                raise ValueError(
                    'Cannot interpret line %d: A semi-column is missing?' % lc)

            key, values = s
            key = key.strip().lower()
            if key.startswith('model'):
                modlabel = values.lower().strip()
                if modlabel not in fp.models:
                    fp.models.append(modlabel)
                common = False
                continue
            elif key.startswith('common') or key.startswith('vars'):
                common = True
                modlabel = 'common'
                continue
            elif key.startswith('shape'):
                shape = values.lower().strip()
                if shape is None:  # or (shape not in self._list_of_models and shape not in self._list_of_baselines):
                    raise ValueError(
                        'Shape of this model "%s" was not specified or is not implemented'
                        % shape)
                fp.model[modlabel] = shape
                common = False
                continue
            elif key.startswith("experiment"):  # must be in common
                if not common:
                    raise ValueError(
                        "'experiment_...' specification was found outside the common block."
                    )
                if "variables" in key:
                    expvars = values.lower().strip()
                    expvars = expvars.replace(',', ' ').replace(';', ' ')
                    expvars = expvars.split()
                    fp.expvars.extend(expvars)
                continue
            else:
                if modlabel is None and not common:
                    raise ValueError(
                        "The first definition should be a label for a model or a block of variables or constants."
                    )
                # get the parameters
                if key.startswith('*'):
                    fixed = True
                    reference = False
                    key = key[1:].strip()
                elif key.startswith('$'):
                    fixed = False
                    reference = False
                    key = key[1:].strip()
                elif key.startswith('>'):
                    fixed = True
                    reference = True
                    key = key[1:].strip()
                else:
                    raise ValueError(
                        'Cannot interpret line %d: A parameter definition must start with *,$ or >'
                        % lc)

                # store this parameter
                s = values.split(',')
                s = [ss.strip() for ss in s]
                if len(s) > 1 and ('[' in s[0]) and (']' in s[1]):  # list
                    s[0] = "%s, %s" % (s[0], s[1])
                    if len(s) > 2:
                        s[1:] = s[2:]
                if len(s) > 3:
                    raise ValueError(
                        'line %d: value, min, max should be defined in this order'
                        % lc)
                elif len(s) == 2:
                    raise ValueError('only two items in line %d' % lc)
                    # s.append('none')
                elif len(s) == 1:
                    s.extend(['none', 'none'])
                value, mini, maxi = s
                if mini.strip().lower() in ['none', '']:
                    mini = str(-1. / sys.float_info.epsilon)
                if maxi.strip().lower() in ['none', '']:
                    maxi = str(+1. / sys.float_info.epsilon)
                if modlabel != 'common':
                    ks = "%s_%s" % (key, modlabel)
                    # print(ks)
                    # if "ratio_line_1" in ks:
                    #    print('xxxx'+ks)
                    fp.common[key] = False
                else:
                    ks = "%s" % key
                    fp.common[key] = True
                # if key in fp.expvars:
                #    for i in xrange(len(self.datasets)):
                #        ks = "%s_exp%d"%(ks, i)
                fp.reference[ks] = reference
                if not reference:
                    val = value.strip()
                    val = eval(val)
                    if isinstance(val, list):
                        # if the parameter is already a list, that's ok if the number of parameters is ok
                        if len(val) != fp.expnumber:
                            raise ValueError(
                                'the number of parameters for %s is not the number of experiments.'
                                % len(val))
                        if key not in fp.expvars:
                            raise ValueError(
                                'parameter %s is not declared as variable' %
                                key)
                    else:
                        if key in fp.expvars:
                            # we create a list of parameters corresponding
                            val = [val] * fp.expnumber
                    fp[ks] = val, mini.strip(), maxi.strip(), fixed
                else:
                    fp[ks] = value.strip()

        return fp
def test_read_carroucell_with_dirname():
    A = NDDataset.read_carroucell(os.path.join('irdata', 'carroucell_samp'))
    for x in A:
        info_('  ' + x.name + ': ' + str(x.shape))
    assert len(A) == 11
    assert A[3].shape == (6, 11098)
Пример #12
0
def read_carroucell(dataset=None, directory=None, **kwargs):
    """
    Open .spa files in a directory after a carroucell experiment.

    The files for a given sample are grouped in NDDatasets (sorted by acquisition date).
    The NDDatasets are returned in a list sorted by sample number.
    When the file containing the temperature data is present, the temperature is read
    and assigned as a label to each spectrum.

    Parameters
    ----------
    dataset : `NDDataset`
        The dataset to store the data and metadata.
        If None, a NDDataset is created.
    directory : str, optional
        If not specified, opens a dialog box.
    spectra : arraylike of 2 int (min, max), optional, default=None
        The first and last spectrum to be loaded as determined by their number.
         If None all spectra are loaded.
    discardbg : bool, optional, default=True
        If True : do not load background (sample #9).
    delta_clocks : int, optional, default=0
        Difference in seconds between the clocks used for spectra and temperature acquisition.
        Defined as t(thermocouple clock) - t(spectrometer clock).

    Returns
    --------
    nddataset
        |NDDataset| or list of |NDDataset|.

    See Also
    --------
    read_topspin : Read TopSpin Bruker NMR spectra.
    read_omnic : Read Omnic spectra.
    read_opus : Read OPUS spectra.
    read_spg : Read Omnic *.spg grouped spectra.
    read_spa : Read Omnic *.Spa single spectra.
    read_srs : Read Omnic series.
    read_csv : Read CSV files.
    read_zip : Read Zip files.
    read_matlab : Read Matlab files.

    Notes
    ------
    All files are expected to be present in the same directory and their filenames
    are expected to be in the format : X_samplename_YYY.spa
    and for the backround files : X_BCKG_YYYBG.spa
    where X is the sample holder number and YYY the spectrum number.

    Examples
    --------
    """

    # check if the first parameter is a dataset
    # because we allow not to pass it
    if not isinstance(dataset, NDDataset):
        # probably did not specify a dataset
        # so the first parameter must be the directory
        if isinstance(dataset, str) and dataset != '':
            directory = dataset

    directory = readdirname(directory)

    if not directory:
        # probably cancel has been chosen in the open dialog
        info_("No directory was selected.")
        return

    spectra = kwargs.get('spectra', None)
    discardbg = kwargs.get('discardbg', True)

    delta_clocks = datetime.timedelta(seconds=kwargs.get('delta_clocks', 0))

    datasets = []

    # get the sorted list of spa files in the directory
    spafiles = sorted([
        f for f in os.listdir(directory)
        if (os.path.isfile(os.path.join(directory, f))
            and f[-4:].lower() == '.spa')
    ])

    # discard BKG files
    if discardbg:
        spafiles = sorted([f for f in spafiles if 'BCKG' not in f])

    # select files
    if spectra is not None:
        [min, max] = spectra
        if discardbg:
            spafiles = sorted([
                f for f in spafiles
                if min <= int(f.split('_')[2][:-4]) <= max and 'BCKG' not in f
            ])
        if not discardbg:
            spafilespec = sorted([
                f for f in spafiles
                if min <= int(f.split('_')[2][:-4]) <= max and 'BCKG' not in f
            ])
            spafileback = sorted([
                f for f in spafiles
                if min <= int(f.split('_')[2][:-6]) <= max and 'BCKG' in f
            ])
            spafiles = spafilespec + spafileback

    curfilelist = [spafiles[0]]
    curprefix = spafiles[0][::-1].split("_", 1)[1][::-1]

    for f in spafiles[1:]:
        if f[::-1].split("_", 1)[1][::-1] != curprefix:
            datasets.append(
                NDDataset.read_omnic(curfilelist,
                                     sortbydate=True,
                                     directory=directory))
            datasets[-1].name = os.path.basename(curprefix)
            curfilelist = [f]
            curprefix = f[::-1].split("_", 1)[1][::-1]
        else:
            curfilelist.append(f)

    datasets.append(
        NDDataset.read_omnic(curfilelist, sortbydate=True,
                             directory=directory))
    datasets[-1].name = os.path.basename(curprefix)

    # Now manage temperature
    Tfile = sorted(
        [f for f in os.listdir(directory) if f[-4:].lower() == '.xls'])
    if len(Tfile) == 0:
        print_("no temperature file")
    elif len(Tfile) > 1:
        warnings.warn(
            "several .xls/.csv files. The temperature will not be read")
    else:
        Tfile = Tfile[0]
        if Tfile[-4:].lower() == '.xls':
            book = xlrd.open_workbook(os.path.join(directory, Tfile))

            # determine experiment start and end time (thermocouple clock)
            ti = datasets[0].y.labels[0][0] + delta_clocks
            tf = datasets[-1].y.labels[-1][0] + delta_clocks

            # get thermocouple time and T information during the experiment
            t = []
            T = []
            sheet = book.sheet_by_index(0)
            for i in range(9, sheet.nrows):
                try:
                    time = datetime.datetime.strptime(
                        sheet.cell(i, 0).value, '%d/%m/%y %H:%M:%S').replace(
                            tzinfo=datetime.timezone.utc)
                    if ti <= time <= tf:
                        t.append(time)
                        T.append(sheet.cell(i, 4).value)
                except ValueError:
                    pass
                except TypeError:
                    pass

            # interpolate T = f(timestamp)
            tstamp = [time.timestamp() for time in t]
            # interpolate, except for the first and last points that are extrapolated
            interpolator = scipy.interpolate.interp1d(tstamp,
                                                      T,
                                                      fill_value='extrapolate',
                                                      assume_sorted=True)

            for ds in datasets:
                # timestamp of spectra for the thermocouple clock

                tstamp_ds = [(label[0] + delta_clocks).timestamp()
                             for label in ds.y.labels]
                T_ds = interpolator(tstamp_ds)
                newlabels = np.hstack((ds.y.labels, T_ds.reshape((50, 1))))
                ds.y = Coord(title=ds.y.title,
                             data=ds.y.data,
                             labels=newlabels)

    if len(datasets) == 1:
        return datasets[0]  # a single dataset is returned

    # several datasets returned, sorted by sample #
    return sorted(datasets, key=lambda ds: int(re.split('-|_', ds.name)[0]))
Пример #13
0
    def nmf(V, Winit, Hinit, tol, timelimit, maxiter):
        """
        (W,H) = nmf(V,Winit,Hinit,tol,timelimit,maxiter)
        W,H : output solution
        Winit,Hinit : initial solution
        tol : tolerance for a relative stopping condition
        timelimit, maxiter : limit of time and iterations
        """
        def nlssubprob(V, W, Hinit, tol, maxiter):
            """
            H, grad : output solution and gradient
            iter : #iterations used
            V, W : constant matrices
            Hinit : initial solution
            tol : stopping tolerance
            maxiter : limit of iterations
            """

            H = Hinit
            WtV = np.dot(W.T, V)
            WtW = np.dot(W.T, W)

            alpha = 1
            beta = 0.1

            for n_iter in range(1, maxiter + 1):
                grad = np.dot(WtW, H) - WtV
                if norm(grad * np.logical_or(grad < 0, H > 0)) < tol:
                    break

            Hp = H

            # search step size
            for inner_iter in range(20):
                # gradient step
                Hn = H - alpha * grad
                # gradient step
                Hn *= Hn > 0
                d = Hn - H
                gradd = np.dot(grad.ravel(), d.ravel())
                dQd = np.dot(np.dot(WtW, d).ravel(), d.ravel())
                suff_decr = 0.99 * gradd + 0.5 * dQd < 0
                if inner_iter == 0:
                    decr_alpha = not suff_decr
                    Hp = H
                if decr_alpha:
                    if suff_decr:
                        H = Hn
                        break
                    else:
                        alpha = alpha * beta
                else:
                    if not suff_decr or (Hp == Hn).all():
                        H = Hp
                        break
                    else:
                        alpha = alpha / beta
                        Hp = Hn

            if n_iter == maxiter:
                info_('Max iter in nlssubprob')

            return H, grad, n_iter

        W = Winit

        H = Hinit

        initt = time()

        gradW = np.dot(W, np.dot(H, H.T)) - np.dot(V, H.T)
        gradH = np.dot(np.dot(W.T, W), H) - np.dot(W.T, V)
        initgrad = norm(np.r_[gradW, gradH.T])
        info_('Init gradient norm {:.3f}'.format(initgrad))
        tolW = max(0.001, tol) * initgrad
        tolH = tolW

        for myiter in range(1, maxiter):
            # stopping condition
            projnorm = norm(np.r_[gradW[np.logical_or(gradW < 0, W > 0)],
                                  gradH[np.logical_or(gradH < 0, H > 0)]])

            if projnorm < tol * initgrad or time() - initt > timelimit:
                break

            (W, gradW, iterW) = nlssubprob(V.T, H.T, W.T, tolW, 10000)
            W = W.T
            gradW = gradW.T

            if iterW == 1:
                tolW = 0.1 * tolW

            (H, gradH, iterH) = nlssubprob(V, W, H, tolH, 10000)

            if iterH == 1:
                tolH = 0.1 * tolH

            if myiter % 10 == 0:
                stdout.write('.')

        info_('\nIter = {} Final proj-grad norm {:.3f}'.format(
            myiter, projnorm))
        return W, H
Пример #14
0
    def nmf(self, V, Winit, Hinit, tol, maxtime, maxiter):
        """
        NMF by alternative non-negative least squares using projected gradients.

        Parameters
        ==========
        V: |ndarray|
            numpy array to be analysed
        Winit,Hinit: |ndarray|
            Initial solutions for concentration and spectral profile..
        tol: float
            Tolerance for a relative stopping condition.
        maxtime: float
            Limit of time.
        maxiter: int
            Limit number for iterations.

        Returns
        =======
        W,H: |ndarray|
            Output solution.
        """

        W = Winit
        H = Hinit

        initt = time()

        gradW = np.dot(W, np.dot(H, H.T)) - np.dot(V, H.T)
        gradH = np.dot(np.dot(W.T, W), H) - np.dot(W.T, V)
        initgrad = norm(np.r_[gradW, gradH.T])
        info_(f"Init gradient norm {initgrad:.3f}")
        tolW = max(0.001, tol) * initgrad
        tolH = tolW

        for myiter in range(1, maxiter):
            # stopping condition
            projnorm = norm(
                np.r_[
                    gradW[np.logical_or(gradW < 0, W > 0)],
                    gradH[np.logical_or(gradH < 0, H > 0)],
                ]
            )

            if projnorm < tol * initgrad or time() - initt > maxtime:
                break

            (W, gradW, iterW) = self.nlssubprob(V.T, H.T, W.T, tolW, 1000)
            W = W.T
            gradW = gradW.T

            if iterW == 1:
                tolW = 0.1 * tolW

            (H, gradH, iterH) = self.nlssubprob(V, W, H, tolH, 1000)

            if iterH == 1:
                tolH = 0.1 * tolH

            if myiter % 10 == 0:
                stdout.write(".")

        info_(f"\nIter = {myiter} Final proj-grad norm {projnorm:.3f}")
        return W, H
Пример #15
0
def _read_carroucell(*args, **kwargs):

    _, directory = args
    directory = get_directory_name(directory)

    if not directory:  # pragma: no cover
        # probably cancel has been chosen in the open dialog
        info_("No directory was selected.")
        return

    spectra = kwargs.get("spectra", None)
    discardbg = kwargs.get("discardbg", True)
    delta_clocks = datetime.timedelta(seconds=kwargs.get("delta_clocks", 0))

    datasets = []

    # get the sorted list of spa files in the directory
    spafiles = sorted(get_filenames(directory, **kwargs)[".spa"])
    spafilespec = [f for f in spafiles if "BCKG" not in f.stem]
    spafileback = [f for f in spafiles if "BCKG" in f.stem]

    # select files
    prefix = lambda f: f.stem.split("_")[0]
    number = lambda f: int(f.stem.split("_")[1])
    if spectra is not None:
        [min, max] = spectra
        spafilespec = [f for f in spafilespec if min <= number(f) <= max]
        spafileback = [f for f in spafileback if min <= number(f) <= max]

    # discard BKG files
    spafiles = spafilespec
    if not discardbg:
        spafiles += spafileback

    # merge dataset with the same number
    curfilelist = [spafiles[0]]
    curprefix = prefix(spafiles[0])
    for f in spafiles[1:]:
        if prefix(f) != curprefix:
            ds = NDDataset.read_omnic(curfilelist,
                                      sortbydate=True,
                                      directory=directory,
                                      name=curprefix)
            datasets.append(ds)
            curfilelist = [f]
            curprefix = prefix(f)
        else:
            curfilelist.append(f)
    ds = NDDataset.read_omnic(curfilelist,
                              sortbydate=True,
                              directory=directory,
                              name=curprefix)
    datasets.append(ds)

    # Now manage temperature
    Tfile = sorted(
        [f for f in os.listdir(directory) if f[-4:].lower() == ".xls"])
    if len(Tfile) == 0:
        print_("no temperature file")
    elif len(Tfile) > 1:
        warnings.warn(
            "several .xls/.csv files. The temperature will not be read")
    else:
        Tfile = Tfile[0]
        if Tfile[-4:].lower() == ".xls":
            book = xlrd.open_workbook(os.path.join(directory, Tfile))

            # determine experiment start and end time (thermocouple clock)
            ti = datasets[0].y.labels[0][0] + delta_clocks
            tf = datasets[-1].y.labels[-1][0] + delta_clocks

            # get thermocouple time and T information during the experiment
            t = []
            T = []
            sheet = book.sheet_by_index(0)
            for i in range(9, sheet.nrows):
                try:
                    time = datetime.datetime.strptime(
                        sheet.cell(i, 0).value, "%d/%m/%y %H:%M:%S").replace(
                            tzinfo=datetime.timezone.utc)
                    if ti <= time <= tf:
                        t.append(time)
                        T.append(sheet.cell(i, 4).value)
                except ValueError:
                    pass
                except TypeError:
                    pass

            # interpolate T = f(timestamp)
            tstamp = [time.timestamp() for time in t]
            # interpolate, except for the first and last points that are extrapolated
            interpolator = scipy.interpolate.interp1d(tstamp,
                                                      T,
                                                      fill_value="extrapolate",
                                                      assume_sorted=True)

            for ds in datasets:
                # timestamp of spectra for the thermocouple clock

                tstamp_ds = [(label[0] + delta_clocks).timestamp()
                             for label in ds.y.labels]
                T_ds = interpolator(tstamp_ds)
                newlabels = np.hstack((ds.y.labels, T_ds.reshape((50, 1))))
                ds.y = Coord(title=ds.y.title,
                             data=ds.y.data,
                             labels=newlabels)

    if len(datasets) == 1:
        return datasets[0]  # a single dataset is returned

    # several datasets returned, sorted by sample #
    return sorted(datasets, key=lambda ds: re.split("-|_", ds.name)[0])
Пример #16
0
    def __init__(self, dataset, **kwargs):

        super().__init__()

        # ------------------------------------------------------------------------
        # Utility functions
        # ------------------------------------------------------------------------
        def figures_of_merit(X, maxPIndex, C, St, j):
            # return %explained variance and stdev of residuals when the jth compound is added
            C[:, j] = X[:, maxPIndex[j]]
            St[0:j + 1, :] = np.linalg.lstsq(C.data[:, 0:j + 1],
                                             X.data,
                                             rcond=None)[0]
            Xhat = dot(C[:, 0:j + 1], St[0:j + 1, :])
            res = Xhat - X
            stdev_res = np.std(res)
            rsquare = 1 - np.linalg.norm(res)**2 / np.linalg.norm(X)**2
            return rsquare, stdev_res

        def str_iter_summary(j, index, coord, rsquare, stdev_res, diff):
            # return formatted list of figure of merits at a given iteration

            string = "{:4}  {:5}  {:8.1f} {:10.4f} {:10.4f} ".format(
                j + 1, index, coord, stdev_res, rsquare)
            return string

        def get_x_data(X):
            if X.x is not None and not X.x.is_empty:  # TODO what about labels?
                return X.x.data
            else:
                return np.arange(X.shape[-1])

        # ------------------------------------------------------------------------
        # Check data
        # ------------------------------------------------------------------------

        X = dataset

        if len(X.shape) != 2:
            raise ValueError("For now, SIMPLISMA only handles 2D Datasets")

        if np.min(X.data) < 0:
            warnings.warn("SIMPLISMA does not handle easily negative values.")
            # TODO: check whether negative values should be set to zero or not.

        if "verbose" in kwargs.keys():
            warnings.warn(
                "verbose deprecated. Instead, use set_loglevel(INFO) before launching MCRALS",
                DeprecationWarning,
            )
            set_loglevel(INFO)

        interactive = kwargs.get("interactive", False)
        tol = kwargs.get("tol", 0.1)
        noise = kwargs.get("noise", 3)
        n_pc = kwargs.get("n_pc", 2)
        if n_pc < 2 or not isinstance(n_pc, int):
            raise ValueError(
                "Oh you did not just... 'MA' in simplisMA stands for Mixture Analysis. "
                "The number of pure compounds should be an integer larger than 2"
            )
        if interactive:
            n_pc = 100

        # ------------------------------------------------------------------------
        # Core
        # ------------------------------------------------------------------------

        if not interactive:
            logs = "*** Automatic SIMPL(I)SMA analysis *** \n"
        else:
            logs = "*** Interactive SIMPLISMA analysis *** \n"
        logs += "dataset: {}\n".format(X.name)
        logs += "  noise: {:2} %\n".format(noise)
        if not interactive:
            logs += "    tol: {:2} %\n".format(tol)
            logs += "   n_pc: {:2}\n".format(n_pc)
        logs += "\n"
        logs += "#iter index_pc  coord_pc   Std(res)   R^2   \n"
        logs += "---------------------------------------------"
        info_(logs)
        logs += "\n"

        # Containers for returned objects and intermediate data
        # ---------------------------------------------------
        # purity 'spectra' (generally spectra if X is passed,
        # but could also be concentrations if X.T is passed)
        Pt = NDDataset.zeros((n_pc, X.shape[-1]))
        Pt.name = "Purity spectra"
        Pt.set_coordset(y=Pt.y, x=X.x)
        Pt.y.title = "# pure compound"

        # weight matrix
        w = NDDataset.zeros((n_pc, X.shape[-1]))
        w.set_coordset(y=Pt.y, x=X.x)

        # Stdev spectrum
        s = NDDataset.zeros((n_pc, X.shape[-1]))
        s.name = "Standard deviation spectra"
        s.set_coordset(y=Pt.y, x=X.x)

        # maximum purity indexes and coordinates
        maxPIndex = [0] * n_pc
        maxPCoordinate = [0] * n_pc

        # Concentration matrix
        C = NDDataset.zeros((X.shape[-2], n_pc))
        C.name = "Relative Concentrations"
        C.set_coordset(y=X.y, x=C.x)
        C.x.title = "# pure compound"

        # Pure component spectral profiles
        St = NDDataset.zeros((n_pc, X.shape[-1]))
        St.name = "Pure compound spectra"
        St.set_coordset(y=Pt.y, x=X.x)

        # Compute Statistics
        # ------------------
        sigma = np.std(X.data, axis=0)
        mu = np.mean(X.data, axis=0)
        alpha = (noise / 100) * np.max(mu.data)
        lamda = np.sqrt(mu**2 + sigma**2)
        p = sigma / (mu + alpha)

        # scale dataset
        Xscaled = X.data / np.sqrt(mu**2 + (sigma + alpha)**2)

        # COO dispersion matrix
        COO = (1 / X.shape[-2]) * np.dot(Xscaled.T, Xscaled)

        # Determine the purest variables
        j = 0
        finished = False
        while not finished:
            # compute first purest variable and weights
            if j == 0:
                w[j, :] = lamda**2 / (mu**2 + (sigma + alpha)**2)
                s[j, :] = sigma * w[j, :]
                Pt[j, :] = p * w[j, :]

                # get index and coordinate of pure variable
                maxPIndex[j] = np.argmax(Pt[j, :].data)
                maxPCoordinate[j] = get_x_data(X)[maxPIndex[j]]

                # compute figures of merit
                rsquare0, stdev_res0 = figures_of_merit(X, maxPIndex, C, St, j)

                # add summary to log
                llog = str_iter_summary(j, maxPIndex[j], maxPCoordinate[j],
                                        rsquare0, stdev_res0, "")
                logs += llog + "\n"

                if interactive:
                    print(llog)

                if interactive:
                    # should plot purity and stdev, does not work for the moment
                    # TODO: fix the code below
                    # fig1, (ax1, ax2) = plt.subplots(2,1)
                    # Pt[j, :].plot(ax=ax1)
                    # ax1.set_title('Purity spectrum #{}'.format(j+1))
                    # ax1.axvline(maxPCoordinate[j], color='r')
                    # s[j, :].plot(ax=ax2)
                    # ax2.set_title('standard deviation spectrum #{}'.format(j+1))
                    # ax2.axvline(maxPCoordinate[j], color='r')
                    # plt.show()

                    ans = ""
                    while ans.lower() not in ["a", "c"]:
                        ans = input("   |--> (a) Accept, (c) Change: ")

                    while ans.lower() != "a":
                        new = input(
                            "   |--> enter the new index (int) or variable value (float): "
                        )
                        try:
                            new = int(new)
                            maxPIndex[j] = new
                            maxPCoordinate[j] = get_x_data(X)[maxPIndex[j]]
                        except ValueError:
                            try:
                                new = float(new)
                                maxPIndex[j] = np.argmin(
                                    abs(get_x_data(X) - new))
                                maxPCoordinate[j] = get_x_data(X)[maxPIndex[j]]
                            except ValueError:
                                print(
                                    "Incorrect answer. Please enter a valid index or value"
                                )

                        rsquare0, stdev_res0 = figures_of_merit(
                            X, maxPIndex, C, St, j)

                        llog = str_iter_summary(j, maxPIndex[j],
                                                maxPCoordinate[j], rsquare0,
                                                stdev_res0, "")
                        logs += "   |--> changed pure variable #1"
                        logs += llog + "\n"
                        info_(llog)

                        ans = input("   |--> (a) Accept, (c) Change: ")
                    # ans was [a]ccept
                    j += 1
                if not interactive:
                    j += 1

                prev_stdev_res = stdev_res0

            else:
                # compute jth purest variable
                for i in range(X.shape[-1]):
                    Mji = np.zeros((j + 1, j + 1))
                    idx = [i] + maxPIndex[0:j]
                    for line in range(j + 1):
                        for col in range(j + 1):
                            Mji[line, col] = COO[idx[line], idx[col]]
                    w[j, i] = np.linalg.det(Mji)
                Pt[j:] = p * w[j, :]
                s[j, :] = sigma * w[j, :]

                # get index and coordinate of jth pure variable
                maxPIndex[j] = np.argmax(Pt[j, :].data)
                maxPCoordinate[j] = get_x_data(X)[maxPIndex[j]]

                # compute figures of merit
                rsquarej, stdev_resj = figures_of_merit(X, maxPIndex, C, St, j)
                diff = 100 * (stdev_resj - prev_stdev_res) / prev_stdev_res
                prev_stdev_res = stdev_resj

                # add summary to log
                llog = str_iter_summary(j, maxPIndex[j], maxPCoordinate[j],
                                        rsquarej, stdev_resj, diff)
                logs += llog + "\n"

                if interactive:
                    info_(llog)

                if (
                        interactive
                ):  # TODO: I suggest to use jupyter widgets for the interactivity!
                    # should plot purity and stdev, does not work for the moment
                    # TODO: fix the code below
                    # ax1.clear()
                    # ax1.set_title('Purity spectrum #{}'.format(j+1))
                    # Pt[j, :].plot(ax=ax1)
                    # for coord in maxPCoordinate[:-1]:
                    #     ax1.axvline(coord, color='g')
                    # ax1.axvline(maxPCoordinate[j], color='r')
                    # ax2.clear()
                    # ax2.set_title('standard deviation spectrum #{}'.format(j+1))
                    # s[j, :].plot(ax=ax2)
                    # for coord in maxPCoordinate[:-1]:
                    #     ax2.axvline(coord, color='g')
                    # ax2.axvline(maxPCoordinate[j], color='r')
                    # plt.show()

                    ans = ""
                    while ans.lower() not in ["a", "c", "r", "f"]:
                        ans = input(
                            "   |--> (a) Accept and continue, (c) Change, (r) Reject, (f) Accept and finish: "
                        )

                    while ans.lower() == "c":
                        new = input(
                            "   |--> enter the new index (int) or variable value (float): "
                        )
                        try:
                            new = int(new)
                            maxPIndex[j] = new
                            maxPCoordinate[j] = get_x_data(X)[maxPIndex[j]]
                        except ValueError:
                            try:
                                new = float(new)
                                maxPIndex[j] = np.argmin(
                                    abs(get_x_data(X) - new))
                                maxPCoordinate[j] = get_x_data(X)[maxPIndex[j]]
                            except ValueError:
                                print(
                                    "   |--> Incorrect answer. Please enter a valid index or value"
                                )

                        rsquarej, stdev_resj = figures_of_merit(
                            X, maxPIndex, C, St, j)
                        diff = 100 * (stdev_resj -
                                      prev_stdev_res) / prev_stdev_res
                        prev_stdev_res + stdev_resj

                        logs += f"   |--> changed pure variable #{j + 1}\n"
                        llog = str_iter_summary(
                            j,
                            maxPIndex[j],
                            maxPCoordinate[j],
                            rsquarej,
                            stdev_resj,
                            "diff",
                        )
                        logs += llog + "\n"
                        info_(llog)

                        info_(
                            f"purest variable #{j + 1} set at index = {maxPIndex[j]} ; x = {maxPCoordinate[j]}"
                        )
                        ans = input(
                            "   |--> (a) Accept and continue, (c) Change, (r) Reject, (f) Accept and stop: "
                        )

                    if ans.lower() == "r":
                        maxPCoordinate[j] = 0
                        maxPIndex[j] = 0
                        logs += f"   |--> rejected pure variable #{j + 1}\n"
                        j = j - 1

                    elif ans.lower() == "a":
                        j = j + 1

                    elif ans.lower() == "f":
                        finished = True
                        j = j + 1
                        llog = f"\n**** Interrupted by user at compound # {j} \n**** End of SIMPL(I)SMA analysis."
                        logs += llog + "\n"
                        Pt = Pt[0:j, :]
                        St = St[0:j, :]
                        s = s[0:j, :]
                        C = C[:, 0:j]
                # not interactive
                else:
                    j = j + 1
                    if (1 - rsquarej) < tol / 100:
                        llog = (
                            f"\n**** Unexplained variance lower than 'tol' ({tol}%) \n"
                            "**** End of SIMPL(I)SMA analysis.")
                        logs += llog + "\n"
                        Pt = Pt[0:j, :]
                        St = St[0:j, :]
                        s = s[0:j, :]
                        C = C[:, 0:j]

                        info_(llog)
                        finished = True
            if j == n_pc:
                if not interactive:
                    llog = (
                        f"\n**** Reached maximum number of pure compounds 'n_pc' ({n_pc}) \n"
                        "**** End of SIMPL(I)SMA analysis.")
                    logs += llog + "\n"
                    info_(llog)
                    finished = True

        Pt.description = "Purity spectra from SIMPLISMA:\n" + logs
        C.description = "Concentration/contribution matrix from SIMPLISMA:\n" + logs
        St.description = "Pure compound spectra matrix from SIMPLISMA:\n" + logs
        s.description = "Standard deviation spectra matrix from SIMPLISMA:\n" + logs

        self._logs = logs
        self._X = X
        self._Pt = Pt
        self._C = C
        self._St = St
        self._s = s
Пример #17
0
    def __init__(self, dataset, guess, **kwargs):   # lgtm [py/missing-call-to-init]
        """
        Parameters
        ----------
        dataset : |NDDataset|
            The dataset on which to perform the MCR-ALS analysis
        guess : |NDDataset|
            Initial concentration or spectra
        verbose : bool
            If set to True, prints a summary of residuals and residuals change at each iteration. default = False.
            In any case, the same information is returned in self.logs
        **kwargs : dict
            Optimization parameters : See Other Parameters.

        Other Parameters
        ----------------
        tol : float, optional,  default=0.1
            Convergence criterion on the change of resisuals.
            (percent change of standard deviation of residuals).
        maxit : int, optional, default=50
            Maximum number of ALS minimizations.
        maxdiv : int, optional, default=5.
            Maximum number of successive non-converging iterations.
        nonnegConc : list or tuple, default=Default [0, 1, ...] (only non-negative concentrations)
            Index of species having non-negative concentration profiles. For instance [0, 2] indicates that species
            #0 and #2 have non-negative conc profiles while species #1 can have negative concentrations.
        unimodConc : list or tuple, Default=[0, 1, ...] (only unimodal concentration profiles)
            index of species having unimodal concentrationsprofiles.
        closureConc : list or tuple, Default=None  (no closure)
            Index of species subjected to a closure constraint.
        externalConc: list or tuple, Default None (no external concentration).
            Index of species for which a concentration profile is provided by an external function.
        getExternalConc : callable
            An external function that will provide `n_ext` concentration profiles:

            getExternalConc(C, extConc, ext_to_C_idx, *args) -> extC

            or

            etExternalConc(C, extConc, ext_to_C_idx, *args) -> (extC, out2, out3, ...)

            where C is the current concentration matrix, *args are the parameters needed to completely
            specify the function, extC is a  nadarray or NDDataset of shape (C.y, n_ext), and out1, out2, ... are
            supplementary outputs returned by the function (e.g. optimized rate parameters)
        args : tuple, optional.
            Extra arguments passed to the external function
        external_to_C_idx : array or tuple, Default=np.arange(next)
            Indicates the correspondence between the indexes of external chemical
            profiles and the columns of the C matrix. [1, None, 0] indicates that the first external profile is the
            second pure species (index 1).
        nonnegSpec : list or tuple, Default [1, ..., 1]  (only non-negative spectra)
            Indicates species having non-negative spectra
        unimodSpec : list or tuple, Default [0, ..., 0]  (no unimodal concentration profiles)
            Indicates species having unimodal spectra
        """

        verbose = kwargs.pop('verbose', False)
        if verbose:
            set_loglevel(INFO)

        # Check initial data
        # ------------------------------------------------------------------------

        initConc, initSpec = False, False

        if type(guess) is np.ndarray:
            guess = NDDataset(guess)

        X = dataset

        if X.shape[0] == guess.shape[0]:
            initConc = True
            C = guess.copy()
            C.name = 'Pure conc. profile, mcs-als of ' + X.name
            nspecies = C.shape[1]

        elif X.shape[1] == guess.shape[1]:
            initSpec = True
            St = guess.copy()
            St.name = 'Pure spectra profile, mcs-als of ' + X.name
            nspecies = St.shape[0]

        else:
            raise ValueError('the dimensions of initial concentration '
                             'or spectra dataset do not match the data')

        ny, nx = X.shape

        # makes a PCA with same number of species
        Xpca = PCA(X).reconstruct(n_pc=nspecies)

        # Get optional parameters in kwargs or set them to their default
        # ------------------------------------------------------------------------

        # TODO: make a preference  file to set this kwargs
        # optimization

        tol = kwargs.get('tol', 0.1)
        maxit = kwargs.get('maxit', 50)
        maxdiv = kwargs.get('maxdiv', 5)

        # constraints on concentrations
        nonnegConc = kwargs.get('nonnegConc', np.arange(nspecies))
        unimodConc = kwargs.get('unimodConc', np.arange(nspecies))
        unimodTol = kwargs.get('unimodTol', 1.1)
        unimodMod = kwargs.get('unimodMod', 'strict')
        closureConc = kwargs.get('closureConc', None)
        if closureConc is not None:
            closureTarget = kwargs.get('closureTarget', np.ones(ny))
            closureMethod = kwargs.get('closureMethod', 'scaling')
        monoDecConc = kwargs.get('monoDecConc', None)
        monoDecTol = kwargs.get('monoDecTol', 1.1)
        monoIncConc = kwargs.get('monoIncConc', None)
        monoIncTol = kwargs.get('monoIncTol', 1.1)
        externalConc = kwargs.get('externalConc', None)
        if externalConc is not None:
            external_to_C_idx = kwargs.get('external_to_C_idx', np.arange(nspecies))
        if externalConc is not None:
            try:
                getExternalConc = kwargs.get('getExternalConc')
            except Exception:
                raise ValueError('A function must be given to get the external concentration profile(s)')
            external_to_C_idx = kwargs.get('external_to_C_idx', externalConc)
            args = kwargs.get('args', ())

        # constraints on spectra
        nonnegSpec = kwargs.get('nonnegSpec', np.arange(nspecies))
        normSpec = kwargs.get('normSpec', None)

        # TODO: add unimodal constraint on spectra

        # Compute initial spectra or concentrations   (first iteration...)
        # ------------------------------------------------------------------------

        if initConc:
            if C.coordset is None:
                C.set_coordset(y=X.y, x=C.x)
            St = NDDataset(np.linalg.lstsq(C.data, X.data, rcond=None)[0])
            St.name = 'Pure spectra profile, mcs-als of ' + X.name
            St.title = X.title
            cy = C.x.copy() if C.x else None
            cx = X.x.copy() if X.x else None
            St.set_coordset(y=cy, x=cx)

        if initSpec:
            if St.coordset is None:
                St.set_coordset(y=St.y, x=X.x)
            Ct = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0]
            C = NDDataset(Ct.T)
            C.name = 'Pure conc. profile, mcs-als of ' + X.name
            C.title = 'concentration'
            cx = St.y.copy() if St.y else None
            cy = X.y.copy() if X.y else None
            C.set_coordset(y=cy, x=cx)

        change = tol + 1
        stdev = X.std()  # .data[0]
        niter = 0
        ndiv = 0

        logs = '*** ALS optimisation log***\n'
        logs += '#iter     Error/PCA        Error/Exp      %change\n'
        logs += '---------------------------------------------------'
        info_(logs)

        while change >= tol and niter < maxit and ndiv < maxdiv:

            C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T
            niter += 1

            # Force non-negative concentration
            # --------------------------------
            if nonnegConc is not None:
                for s in nonnegConc:
                    C.data[:, s] = C.data[:, s].clip(min=0)

            # Force unimodal concentration
            # ----------------------------
            if unimodConc is not None:
                for s in unimodConc:
                    maxid = np.argmax(C.data[:, s])
                    curmax = C.data[maxid, s]
                    curid = maxid

                    while curid > 0:
                        curid -= 1
                        if C.data[curid, s] > curmax * unimodTol:
                            if unimodMod == 'strict':
                                C.data[curid, s] = C.data[curid + 1, s]
                            if unimodMod == 'smooth':
                                C.data[curid, s] = (C.data[curid, s] + C.data[
                                    curid + 1, s]) / 2
                                C.data[curid + 1, s] = C.data[curid, s]
                                curid = curid + 2
                        curmax = C.data[curid, s]

                    curid = maxid
                    while curid < ny - 1:
                        curid += 1
                        if C.data[curid, s] > curmax * unimodTol:
                            if unimodMod == 'strict':
                                C.data[curid, s] = C.data[curid - 1, s]
                            if unimodMod == 'smooth':
                                C.data[curid, s] = (C.data[curid, s] + C.data[
                                    curid - 1, s]) / 2
                                C.data[curid - 1, s] = C.data[curid, s]
                                curid = curid - 2
                        curmax = C.data[curid, s]

            # Force monotonic increase
            # ------------------------
            if monoIncConc is not None:
                for s in monoIncConc:
                    for curid in np.arange(ny - 1):
                        if C.data[curid + 1, s] < C.data[curid, s] / monoIncTol:
                            C.data[curid + 1, s] = C.data[curid, s]

            # Force monotonic decrease
            # ----------------------------------------------
            if monoDecConc is not None:
                for s in monoDecConc:
                    for curid in np.arange(ny - 1):
                        if C.data[curid + 1, s] > C.data[curid, s] * monoDecTol:
                            C.data[curid + 1, s] = C.data[curid, s]

            # Closure
            # ------------------------------------------
            if closureConc is not None:
                if closureMethod == 'scaling':
                    Q = np.linalg.lstsq(C.data[:, closureConc], closureTarget.T, rcond=None)[0]
                    C.data[:, closureConc] = np.dot(C.data[:, closureConc], np.diag(Q))
                elif closureMethod == 'constantSum':
                    totalConc = np.sum(C.data[:, closureConc], axis=1)
                    C.data[:, closureConc] = C.data[:, closureConc] * closureTarget[:, None] / totalConc[:, None]

            # external concentration profiles
            # ------------------------------------------
            if externalConc is not None:
                extOutput = getExternalConc(*((C, externalConc, external_to_C_idx,) + args))
                if isinstance(extOutput, dict):
                    extC = extOutput['concentrations']
                    args = extOutput['new_args']
                else:
                    extC = extOutput
                if type(extC) is NDDataset:
                    extC = extC.data
                C.data[:, externalConc] = extC[:, external_to_C_idx]

            # stores C in C_hard
            Chard = C.copy()

            # compute St
            St.data = np.linalg.lstsq(C.data, X.data, rcond=None)[0]

            # stores St in Stsoft
            Stsoft = St.copy()

            # Force non-negative spectra
            # --------------------------
            if nonnegSpec is not None:
                St.data[nonnegSpec, :] = St.data[nonnegSpec, :].clip(min=0)

            # recompute C for consistency(soft modeling)
            C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T


            # rescale spectra & concentrations
            if normSpec == 'max':
                alpha = np.max(St.data, axis=1).reshape(nspecies, 1)
                St.data = St.data / alpha
                C.data = C.data * alpha.T
            elif normSpec == 'euclid':
                alpha = np.linalg.norm(St.data, axis=1).reshape(nspecies, 1)
                St.data = St.data / alpha
                C.data = C.data * alpha.T

            # compute residuals
            # -----------------
            X_hat = dot(C, St)
            stdev2 = (X_hat - X.data).std()
            change = 100 * (stdev2 - stdev) / stdev
            stdev = stdev2

            stdev_PCA = (X_hat - Xpca.data).std()  # TODO: Check PCA : values are different from the Arnaud version ?

            logentry = '{:3d}      {:10f}      {:10f}      {:10f}'.format(niter, stdev_PCA, stdev2, change)
            logs += logentry + '\n'
            info_(logentry)


            if change > 0:
                ndiv += 1
            else:
                ndiv = 0
                change = -change

            if change < tol:
                logentry = 'converged !'
                logs += logentry + '\n'
                info_(logentry)

            if ndiv == maxdiv:
                logline = f"Optimization not improved since {maxdiv} iterations... unconverged " \
                          f"or 'tol' set too small ?\n"
                logline += 'Stop ALS optimization'
                logs += logline + '\n'
                info_(logline)

            if niter == maxit:
                logline = 'Convergence criterion (\'tol\') not reached after {:d} iterations.'.format(maxit)
                logline += 'Stop ALS optimization'
                logs += logline + '\n'
                info_(logline)

        self._X = X
        self._params = kwargs

        self._C = C
        if externalConc is not None:
            self._extC = extC
            self._extOutput = extOutput
        else:
            self._extC = None
            self._extOutput = None

        self._St = St
        self._logs = logs

        self._Stsoft = Stsoft
        self._Chard = Chard
Пример #18
0
    def _interpret(self, script):
        """
        Interpreter of the script content.
        """
        # init some flags
        modlabel = None
        common = False
        fixed = False
        reference = False

        # create a new FitParameters instance
        fp = FitParameters()

        # set the number of experiments
        fp.expnumber = len(self.datasets)
        info_(f"The number of experiment(s) is set to {fp.expnumber}")

        # start interpreting ------------------------------------------------------
        lines = script.split("\n")
        lc = 0

        for item in lines:
            lc += 1  # -------------- count the lines
            line = item.strip()
            if line == "" or line.startswith("#"):
                # this is a blank or comment line, go to next line
                continue
            # split around the semi-column
            s = line.split(":")
            if len(s) != 2:
                raise ValueError(
                    f"Cannot interpret line {lc}: A semi-column is missing?")

            key, values = s
            key = key.strip().lower()
            if key.startswith("model"):
                modlabel = values.lower().strip()
                if modlabel not in fp.models:
                    fp.models.append(modlabel)
                common = False
                continue
            elif key.startswith("common") or key.startswith("vars"):
                common = True
                modlabel = "common"
                continue
            elif key.startswith("shape"):
                shape = values.lower().strip()
                if (
                        shape is None
                ):  # or (shape not in self._list_of_models and shape not in self._list_of_baselines):
                    raise ValueError(
                        f"Shape of this model `{shape}` was not specified or is not implemented"
                    )
                fp.model[modlabel] = shape
                common = False
                continue
            elif key.startswith("experiment"):  # must be in common
                if not common:
                    raise ValueError(
                        "'experiment_...' specification was found outside the common block."
                    )
                if "variables" in key:
                    expvars = values.lower().strip()
                    expvars = expvars.replace(",", " ").replace(";", " ")
                    expvars = expvars.split()
                    fp.expvars.extend(expvars)
                continue
            else:
                if modlabel is None and not common:
                    raise ValueError(
                        "The first definition should be a label for a model or a block of variables or constants."
                    )
                # get the parameters
                if key.startswith("*"):
                    fixed = True
                    reference = False
                    key = key[1:].strip()
                elif key.startswith("$"):
                    fixed = False
                    reference = False
                    key = key[1:].strip()
                elif key.startswith(">"):
                    fixed = True
                    reference = True
                    key = key[1:].strip()
                else:
                    raise ValueError(
                        f"Cannot interpret line {lc}: A parameter definition must start with *,$ or >"
                    )

                # store this parameter
                s = values.split(",")
                s = [ss.strip() for ss in s]
                if len(s) > 1 and ("[" in s[0]) and ("]" in s[1]):  # list
                    s[0] = "%s, %s" % (s[0], s[1])
                    if len(s) > 2:
                        s[1:] = s[2:]
                if len(s) > 3:
                    raise ValueError(
                        f"line {lc}: value, min, max should be defined in this order"
                    )
                elif len(s) == 2:
                    raise ValueError(f"only two items in line {lc}")
                    # s.append('none')
                elif len(s) == 1:
                    s.extend(["none", "none"])
                value, mini, maxi = s
                if mini.strip().lower() in ["none", ""]:
                    mini = str(-1.0 / sys.float_info.epsilon)
                if maxi.strip().lower() in ["none", ""]:
                    maxi = str(+1.0 / sys.float_info.epsilon)
                if modlabel != "common":
                    ks = f"{key}_{modlabel}"
                    fp.common[key] = False
                else:
                    ks = f"{key}"
                    fp.common[key] = True
                fp.reference[ks] = reference
                if not reference:
                    val = value.strip()
                    val = eval(val)
                    if isinstance(val, list):
                        # if the parameter is already a list, that's ok if the number of parameters is ok
                        if len(val) != fp.expnumber:
                            raise ValueError(
                                f"the number of parameters {len(val)} is not the number of experiments."
                            )
                        if key not in fp.expvars:
                            raise ValueError(
                                f"parameter {key} is not declared as variable")
                    else:
                        if key in fp.expvars:
                            # we create a list of parameters corresponding
                            val = [val] * fp.expnumber
                    fp[ks] = val, mini.strip(), maxi.strip(), fixed
                else:
                    fp[ks] = value.strip()

        return fp
Пример #19
0
    def run(self,
            maxiter=100,
            maxfun=None,
            every=10,
            method='simplex',
            **kwargs):
        """ Main fitting procedure

        Parameters
        ----------
        maxiter : int, maximum number of iteration
        maxfun : int, maximum number of function calls
        every : int, number of function call between two displays
        method : str, ether 'simplex' or 'hopping'
        dryrun : bool
        """

        if not self.silent:
            level = preferences.log_level
            if level > INFO:
                preferences.log_level = INFO
            info_('*' * 50)
            info_('  Entering fitting procedure')
            info_('*' * 50)

        global niter, chi2, everyiter, ncalls
        ncalls = 0
        everyiter = every
        niter = 0

        # internally defined function chi2
        def funchi2(params, datasets, *constraints):
            """
            Return sum((y - x)**2)
            """
            global chi2, ncalls
            # model spectrum

            chi2 = 0
            som = 0
            ncalls += 1

            for exp_idx, dataset in enumerate(datasets):
                modeldata = self._get_modeldata(dataset, exp_idx)[0]
                # baseline is already summed with modeldata[-1]

                # important to work with the real component of dataset
                # not the complex number
                data = dataset.real.data.squeeze()

                # if not dataset.is_2d:
                mdata = modeldata[-1]  # modelsum

                # else:
                #    mdata = modeldata.values

                merror = 1.
                # if dataset.is_2d:
                #     if constraints:
                #
                #         # Case of SQ-DQ experiments
                #         if self.kind == 'SQ-DQ' and \
                #                         'max_connections' in constraints[0]:
                #             # check connectivity numbers
                #             nbconnections = {}
                #             for key in params.keys():
                #                 if 'pos1' in key:
                #                     connect = key[-2:]
                #                     key = 'ampl_line_' + connect  # get amplitude
                #                     ki = connect[0].upper()
                #                     if ki not in nbconnections.keys():
                #                         nbconnections[ki] = 0
                #                     if int(params[key]) > 0:
                #                         nbconnections[ki] += 1
                #             for k, v in nbconnections.iteritems():
                #                 if v > constraints[0]['max_connections']:
                #                     merror *= v * 10.

                diff = data - mdata
                chi2 += np.sum(diff**2) * merror
                som += np.sum(data[0]**2)

            chi2 = np.sqrt(chi2 / som)
            # reset log_level
            return chi2

        # end chi2 function ---------------------------------------------------

        # callback function--------------------------------------------------------
        def callback(*args, **kwargs):
            """
            callback log.info function
            """
            global niter, chi2, everyiter, ncalls
            niter += 1

            if niter % everyiter != 0:
                return

            if not self.silent:
                display.clear_output(wait=True)
                info_(("Iterations: %d, Calls: %d (chi2: %.5f)" %
                       (niter, ncalls, chi2)))
                sys.stdout.flush()

        # end callback function ---------------------------------------------------

        fp = self.fp  # starting parameters

        dry = kwargs.get("dry", False)

        if not dry:
            fp, fopt = optimize(funchi2,
                                fp,
                                args=(self.datasets, ),
                                maxfun=maxfun,
                                maxiter=maxiter,
                                method=method,
                                constraints=kwargs.get('constraints', None),
                                callback=callback)

        # replace the previous script with new fp parameters
        self.parameterscript.script = str(fp)

        if not self.silent:
            # log.info the results
            info_("\n")
            info_('*' * 50)
            if not dry:
                info_("  Result:")
            else:
                info_("  Starting parameters:")
            info_('*' * 50)
            info_(self.parameterscript.script)

        # store the models
        for exp_idx, dataset in enumerate(self.datasets):
            dataset.modeldata, dataset.modelnames, dataset.model_A, dataset.model_a, dataset.model_b = \
                self._get_modeldata(dataset, exp_idx)

        # Reset Log_level
        if not self.silent:
            preferences.log_level = level

        return
Пример #20
0
def download_nist_ir(CAS, index="all"):
    """
    Upload IR spectra from NIST webbook

    Parameters
    ----------
    CAS : int or str
        the CAS number, can be given as "XXXX-XX-X" (str), "XXXXXXX" (str), XXXXXXX (int)

    index : str or int or tuple of ints
        If set to 'all' (default, import all available spectra for the compound corresponding to the index, or a single spectrum,
        or selected spectra.

    Returns
    -------
    list of NDDataset or NDDataset
        The dataset(s).

    See Also
    --------
    read : Read data from experimental data.
    """

    if isinstance(CAS, str) and "-" in CAS:
        CAS = CAS.replace("-", "")

    if index == "all":
        # test urls and return list if any...
        index = []
        i = 0
        while "continue":
            url = (
                f"https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C{CAS}&Index={i}&Type=IR"
            )
            try:
                response = requests.get(url, timeout=10)
                if b"Spectrum not found" in response.content[:30]:
                    break
                else:
                    index.append(i)
                    i += 1
            except OSError:
                error_("OSError: could not connect to NIST")
                return None

        if len(index) == 0:
            error_("NIST IR: no spectrum found")
            return
        elif len(index) == 1:
            info_("NIST IR: 1 spectrum found")
        else:
            info_("NISTR IR: {len(index)} spectra found")

    elif isinstance(index, int):
        index = [index]
    elif not is_iterable(index):
        raise ValueError("index must be 'all', int or iterable of int")

    out = []
    for i in index:
        # sample adress (water, spectrum 1)
        # https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C7732185&Index=1&Type=IR
        url = f"https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C{CAS}&Index={i}&Type=IR"
        try:
            response = requests.get(url, stream=True, timeout=10)
            if b"Spectrum not found" in response.content[:30]:
                error_(
                    f"NIST IR: Spectrum {i} does not exist... please check !")
                if i == index[-1] and out == []:
                    return None
                else:
                    break

        except OSError:
            error_("OSError: Cannot connect... ")
            return None

        # Load data
        txtdata = ""
        for rd in response.iter_content():
            txtdata += rd.decode("utf8")

        with open("temp.jdx", "w") as f:
            f.write(txtdata)
        try:
            ds = read_jcamp("temp.jdx")

            # replace the default entry ":imported from jdx file":
            ds.history[0] = ds.history[0][:len(str(datetime.now(
                timezone.utc)))] + (f" : downloaded from NIST: {url}\n")
            out.append(ds)
            (Path(".") / "temp.jdx").unlink()

        except Exception:
            raise OSError(
                "Can't read this JCAMP file: please report the issue to Spectrochempy developpers"
            )

    if len(out) == 1:
        return out[0]
    else:
        return out