def figures_of_merit(X, maxPIndex, C, St, j): # return %explained variance and stdev of residuals when the jth compound is added C[:, j] = X[:, maxPIndex[j]] St[0:j + 1, :] = np.linalg.lstsq(C.data[:, 0:j + 1], X.data, rcond=None)[0] Xhat = dot(C[:, 0:j + 1], St[0:j + 1, :]) res = Xhat - X stdev_res = np.std(res) rsquare = 1 - np.linalg.norm(res)**2 / np.linalg.norm(X)**2 return rsquare, stdev_res
def test_npy(ds1): # functions that keep units # DIAG with pytest.raises(ValueError): df = diag(ds1) # work only for 1d or 2D dataset ds = ds1[0].squeeze() assert ds.ndim == 2 df = diag(ds) assert df.units == ds1.units assert df.ndim == 1 assert df.size == ds.x.size d = ds[0].squeeze() assert d.ndim == 1 df = diag(d) assert df.units == ds1.units assert df.ndim == 2 assert df.size == d.x.size**2 df = diag(ds.data) assert df.implements("NDDataset") # DOT a = ds # 2D dataset b = ds1[3].squeeze() # second 2D dataset b.ito("km", force=True) # put some units to b x = dot(a.T, b) assert x.units == a.units * b.units assert x.shape == (a.x.size, b.x.size) # allow mixing numpy object with dataset x = dot(a.T, b.data) assert x.units == a.units # if no dataset then is it equivalent to np.dot x = dot(a.data.T, b.data) assert isinstance(x, np.ndarray)
def _generate_2D_spectra(concentrations, spectra): """ Generate a fake 2D experimental spectra Parameters ---------- concentrations : |NDDataset| spectra : |NDDataset| Returns ------- |NDDataset| """ from spectrochempy.core.dataset.npy import dot return dot(concentrations.T, spectra)
def generate_fake(): """ Generate a fake 2D experimental spectra returns ------- datasets: 2D spectra, individual spectra and concentrations """ # define properties of the spectra and concentration profiles # ---------------------------------------------------------------------------------------------------------------------- from spectrochempy.core.dataset.npy import dot # data for four peaks (one very broad) POS = (6000.0, 4000.0, 2000.0, 2500.0) WIDTH = (6000.0, 1000.0, 250.0, 800.0) AMPL = (100.0, 70.0, 10.0, 50.0) RATIO = (0.1, 0.5, 0.2, 1.0) ASYM = (0.0, 0.0, 0, 4) MODEL = ("gaussian", "voigt", "voigt", "asymmetricvoigt") def C1(t): return t * 0.05 + 0.01 # linear evolution of the baseline def C2(t): return scp.sigmoidmodel().f(t, 1.0, max(t) / 2.0, 1, 2) def C3(t): return scp.sigmoidmodel().f(t, 1.0, max(t) / 5.0, 1, -2) def C4(t): return 1.0 - C2(t) - C3(t) specs = _make_spectra_matrix(MODEL, AMPL, POS, WIDTH, RATIO, ASYM) concs = _make_concentrations_matrix(C1, C2, C3, C4) # make 2D d = dot(concs.T, specs) # add some noise d.data = np.random.normal(d.data, 0.005 * d.data.max()) # d.plot() return d, specs, concs
def reconstruct(self): """ Transform data back to the original space. The following matrix operation is performed: :math:`X'_{hat} = C'.S'^t` Returns ------- X_hat The reconstructed dataset based on the SIMPLISMA Analysis. """ # reconstruct from concentration and spectra profiles X_hat = dot(self.C, self.St) X_hat.description = "Dataset reconstructed by SIMPLISMA\n" + self.logs X_hat.title = "X_hat: " + self.X.title return X_hat
def reconstruct(self): """ Transform data back to the original space. The following matrice operation is performed : :math:`X'_{hat} = C'.S'^t`. Returns ------- X_hat : |NDDataset| The reconstructed dataset based on the MCS-ALS optimization. """ # reconstruct from concentration and spectra profiles C = self.C St = self.St X_hat = dot(C, St) X_hat.history = "Dataset reconstructed by MCS ALS optimization" X_hat.title = "X_hat: " + self.X.title return X_hat
def reconstruct(self, n_pc=None): """ Transform data back to the original space using the given number of PC's. The following matrice operation is performed : :math:`X' = S'.L'^T` where S'=S[:, n_pc] and L=L[:, n_pc]. Parameters ---------- n_pc : int, optional The number of PC to use for the reconstruction. Returns ------- X_reconstructed : |NDDataset| The reconstructed dataset based on n_pc principal components. """ # get n_pc (automatic or determined by the n_pc arguments) n_pc = self._get_n_pc(n_pc) # reconstruct from scores and loadings using n_pc components S = self._S[:, :n_pc] LT = self._LT[:n_pc] X = dot(S, LT) # try to reconstruct something close to the original scaled, standardized or centered data if self._scaled: X *= self._ampl X += self._min if self._standardized: X *= self._std if self._centered: X += self._center X.history = f'PCA reconstructed Dataset with {n_pc} principal components' X.title = self._X.title return X
def __init__(self, dataset, centered=True, standardized=False, scaled=False): """ Parameters ---------- dataset : |NDDataset| object The input dataset has shape (M, N). M is the number of observations (for examples a series of IR spectra) while N is the number of features (for example the wavenumbers measured in each IR spectrum). centered : bool, optional, default:True If True the data are centered around the mean values: :math:`X' = X - mean(X)`. standardized : bool, optional, default:False If True the data are scaled to unit standard deviation: :math:`X' = X / \\sigma`. scaled : bool, optional, default:False If True the data are scaled in the interval [0-1]: :math:`X' = (X - min(X)) / (max(X)-min(X))` """ self.prefs = dataset.preferences self._X = X = dataset Xsc = X.copy() # mean center the dataset # ----------------------- self._centered = centered if centered: self._center = center = np.mean(X, axis=0) Xsc = X - center Xsc.title = "centered %s" % X.title # Standardization # --------------- self._standardized = standardized if standardized: self._std = np.std(Xsc, axis=0) Xsc /= self._std Xsc.title = "standardized %s" % Xsc.title # Scaling # ------- self._scaled = scaled if scaled: self._min = np.min(Xsc, axis=0) self._ampl = np.ptp(Xsc, axis=0) Xsc -= self._min Xsc /= self._ampl Xsc.title = "scaled %s" % Xsc.title self._Xscaled = Xsc # perform SVD # ----------- svd = SVD(Xsc) sigma = svd.s.diag() U = svd.U VT = svd.VT # select n_pc loadings & compute scores # -------------------------------------------------------------------- # loadings LT = VT LT.title = 'loadings (L^T) of ' + X.name LT.history = 'Created by PCA' # scores S = dot(U, sigma) S.title = 'scores (S) of ' + X.name S.set_coordset(y=X.y, x=Coord(None, labels=['#%d' % (i + 1) for i in range(svd.s.size)], title='principal component')) S.description = 'scores (S) of ' + X.name S.history = 'Created by PCA' self._LT = LT self._S = S # other attributes # ---------------- self._sv = svd.sv self._sv.x.title = 'PC #' self._ev = svd.ev self._ev.x.title = 'PC #' self._ev_ratio = svd.ev_ratio self._ev_ratio.x.title = 'PC #' self._ev_cum = svd.ev_cum self._ev_cum.x.title = 'PC #' return
def __init__(self, dataset, guess, **kwargs): # list all default arguments: tol = kwargs.get("tol", 0.1) maxit = kwargs.get("maxit", 50) maxdiv = kwargs.get("maxdiv", 5) nonnegConc = kwargs.get("nonnegConc", "all") unimodConc = kwargs.get("unimodConc", "all") unimodConcTol = kwargs.get("unimodConcTol", 1.1) unimodConcMod = kwargs.get("unimodMod", "strict") if "unimodTol" in kwargs.keys(): warnings.warn("unimodTol deprecated, use unimodConcTol instead", DeprecationWarning) unimodConcTol = kwargs.get("unimodTol", 1.1) if "unimodMod" in kwargs.keys(): warnings.warn("unimodMod deprecated, use unimodConcMod instead", DeprecationWarning) unimodConcMod = kwargs.get("unimodConcMod", "strict") monoDecConc = kwargs.get("monoDecConc", None) monoIncTol = kwargs.get("monoIncTol", 1.1) monoIncConc = kwargs.get("monoIncConc", None) monoDecTol = kwargs.get("monoDecTol", 1.1) closureConc = kwargs.get("closureConc", None) closureTarget = kwargs.get("closureTarget", "default") closureMethod = kwargs.get("closureMethod", "scaling") hardConc = kwargs.get("hardConc", None) getConc = kwargs.get("getConc", None) argsGetConc = kwargs.get("argsGetConc", None) hardC_to_C_idx = kwargs.get("hardC_to_C_idx", "default") unimodSpec = kwargs.get("unimodSpec", None) unimodSpecTol = kwargs.get("unimodSpecTol", 1.1) unimodSpecMod = kwargs.get("unimodSpecMod", "strict") nonnegSpec = kwargs.get("nonnegSpec", "all") normSpec = kwargs.get("normSpec", None) if "verbose" in kwargs.keys(): warnings.warn( "verbose deprecated. Instead, use set_loglevel(INFO) before launching MCRALS", DeprecationWarning, ) set_loglevel(INFO) # Check initial data # ------------------------------------------------------------------------ initConc, initSpec = False, False if type(guess) is np.ndarray: guess = NDDataset(guess) X = dataset if X.shape[0] == guess.shape[0]: initConc = True C = guess.copy() C.name = "Pure conc. profile, mcs-als of " + X.name nspecies = C.shape[1] elif X.shape[1] == guess.shape[1]: initSpec = True St = guess.copy() St.name = "Pure spectra profile, mcs-als of " + X.name nspecies = St.shape[0] else: raise ValueError("the dimensions of guess do not match the data") ny, _ = X.shape # makes a PCA with same number of species for further comparison Xpca = PCA(X).reconstruct(n_pc=nspecies) # reset default text to indexes # ------------------------------ if nonnegConc == "all": nonnegConc = np.arange(nspecies) elif nonnegConc is None: nonnegConc = [] elif nonnegConc != [] and (len(nonnegConc) > nspecies or max(nonnegConc) + 1 > nspecies): raise ValueError( f"The guess has only {nspecies} species, please check nonnegConc" ) if unimodConc == "all": unimodConc = np.arange(nspecies) elif unimodConc is None: unimodConc = [] elif unimodConc != [] and (len(unimodConc) > nspecies or max(unimodConc) + 1 > nspecies): raise ValueError( f"The guess has only {nspecies} species, please check unimodConc" ) if closureTarget == "default": closureTarget = np.ones(ny) elif len(closureTarget) != ny: raise ValueError( f"The data contain only {ny} observations, please check closureTarget" ) if hardC_to_C_idx == "default": hardC_to_C_idx = np.arange(nspecies) elif len(hardC_to_C_idx ) > nspecies or max(hardC_to_C_idx) + 1 > nspecies: raise ValueError( f"The guess has only {nspecies} species, please check hardC_to_C_idx" ) # constraints on spectra if unimodSpec == "all": unimodSpec = np.arange(nspecies) elif unimodSpec is None: unimodSpec = [] elif unimodSpec != [] and (len(unimodSpec) > nspecies or max(unimodSpec) + 1 > nspecies): raise ValueError( f"The guess has only {nspecies} species, please check unimodSpec" ) if nonnegSpec == "all": nonnegSpec = np.arange(nspecies) elif nonnegSpec is None: nonnegSpec = [] elif nonnegSpec != [] and (len(nonnegSpec) > nspecies or max(nonnegSpec) + 1 > nspecies): raise ValueError( f"The guess has only {nspecies} species, please check nonnegSpec" ) # Compute initial spectra or concentrations (first iteration...) # ------------------------------------------------------------------------ if initConc: if C.coordset is None: C.set_coordset(y=X.y, x=C.x) St = NDDataset(np.linalg.lstsq(C.data, X.data, rcond=None)[0]) St.name = "Pure spectra profile, mcs-als of " + X.name St.title = X.title cy = C.x.copy() if C.x else None cx = X.x.copy() if X.x else None St.set_coordset(y=cy, x=cx) if initSpec: if St.coordset is None: St.set_coordset(y=St.y, x=X.x) Ct = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0] C = NDDataset(Ct.T) C.name = "Pure conc. profile, mcs-als of " + X.name C.title = "concentration" cx = St.y.copy() if St.y else None cy = X.y.copy() if X.y else None C.set_coordset(y=cy, x=cx) change = tol + 1 stdev = X.std() niter = 0 ndiv = 0 log = "*** ALS optimisation log***\n" log += "#iter Error/PCA Error/Exp %change \n" log += "------------------------------------------------- \n" info_(log) while change >= tol and niter < maxit and ndiv < maxdiv: C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T niter += 1 # Force non-negative concentration # -------------------------------- if nonnegConc is not None: for s in nonnegConc: C.data[:, s] = C.data[:, s].clip(min=0) # Force unimodal concentration # ---------------------------- if unimodConc != []: C.data = _unimodal_2D( C.data, idxes=unimodConc, axis=0, tol=unimodConcTol, mod=unimodConcMod, ) # Force monotonic increase # ------------------------ if monoIncConc is not None: for s in monoIncConc: for curid in np.arange(ny - 1): if C.data[curid + 1, s] < C.data[curid, s] / monoIncTol: C.data[curid + 1, s] = C.data[curid, s] # Force monotonic decrease # ---------------------------------------------- if monoDecConc is not None: for s in monoDecConc: for curid in np.arange(ny - 1): if C.data[curid + 1, s] > C.data[curid, s] * monoDecTol: C.data[curid + 1, s] = C.data[curid, s] # Closure # ------------------------------------------ if closureConc is not None: if closureMethod == "scaling": Q = np.linalg.lstsq(C.data[:, closureConc], closureTarget.T, rcond=None)[0] C.data[:, closureConc] = np.dot(C.data[:, closureConc], np.diag(Q)) elif closureMethod == "constantSum": totalConc = np.sum(C.data[:, closureConc], axis=1) C.data[:, closureConc] = (C.data[:, closureConc] * closureTarget[:, None] / totalConc[:, None]) # external concentration profiles # ------------------------------------------ if hardConc is not None: extOutput = getConc(*argsGetConc) if isinstance(extOutput, dict): fixedC = extOutput["concentrations"] argsGetConc = extOutput["new_args"] else: fixedC = extOutput C.data[:, hardConc] = fixedC[:, hardC_to_C_idx] # stores C in C_hard Chard = C.copy() # compute St St.data = np.linalg.lstsq(C.data, X.data, rcond=None)[0] # stores St in Stsoft Stsoft = St.copy() # Force non-negative spectra # -------------------------- if nonnegSpec is not None: St.data[nonnegSpec, :] = St.data[nonnegSpec, :].clip(min=0) # Force unimodal spectra # ---------------------------- if unimodSpec != []: St.data = _unimodal_2D( St.data, idxes=unimodSpec, axis=1, tol=unimodSpecTol, mod=unimodSpecMod, ) # recompute C for consistency(soft modeling) C.data = np.linalg.lstsq(St.data.T, X.data.T)[0].T # rescale spectra & concentrations if normSpec == "max": alpha = np.max(St.data, axis=1).reshape(nspecies, 1) St.data = St.data / alpha C.data = C.data * alpha.T elif normSpec == "euclid": alpha = np.linalg.norm(St.data, axis=1).reshape(nspecies, 1) St.data = St.data / alpha C.data = C.data * alpha.T # compute residuals # ----------------- X_hat = dot(C, St) stdev2 = (X_hat - X.data).std() change = 100 * (stdev2 - stdev) / stdev stdev = stdev2 stdev_PCA = (X_hat - Xpca.data).std() # logentry = "{:3d} {:10f} {:10f} {:10f}".format( niter, stdev_PCA, stdev2, change) log += logentry + "\n" info_(logentry) if change > 0: ndiv += 1 else: ndiv = 0 change = -change if change < tol: logentry = "converged !" log += logentry + "\n" info_(logentry) if ndiv == maxdiv: logline = ( f"Optimization not improved since {maxdiv} iterations... unconverged " f"or 'tol' set too small ?\n") logline += "Stop ALS optimization" log += logline + "\n" info_(logline) if niter == maxit: logline = "Convergence criterion ('tol') not reached after {:d} iterations.".format( maxit) logline += "Stop ALS optimization" log += logline + "\n" info_(logline) self._X = X self._params = { "tol": tol, "maxit": maxit, "maxdiv": maxdiv, "nonnegConc": nonnegConc, "unimodConc": unimodConc, "unimodConcTol": unimodConcTol, "unimodConcMod": unimodConcMod, "closureConc": closureConc, "closureTarget ": closureTarget, "closureMethod": closureMethod, "monoDecConc": monoDecConc, "monoDecTol": monoDecTol, "monoIncConc": monoIncConc, "monoIncTol": monoIncTol, "hardConc": hardConc, "getConc": getConc, "argsGetConc": argsGetConc, "hardC_to_C_idx": hardC_to_C_idx, "nonnegSpec": nonnegSpec, "unimodSpec": unimodConc, "unimodSpecTol": unimodSpecTol, "unimodSpecMod": unimodSpecMod, "normSpec": normSpec, } self._C = C if hardConc is not None: self._fixedC = fixedC self._extOutput = extOutput else: self._fixedC = None self._extOutput = None self._St = St self._log = log self._Stsoft = Stsoft self._Chard = Chard
def __init__(self, dataset, centered=True, standardized=False, scaled=False): super().__init__() self.prefs = dataset.preferences self._X = X = dataset Xsc = X.copy() # mean center the dataset # ----------------------- self._centered = centered if centered: self._center = center = X.mean(dim=0) Xsc = X - center Xsc.name = f"centered {X.name}" # Standardization # --------------- self._standardized = standardized if standardized: self._std = Xsc.std(dim=0) Xsc /= self._std Xsc.name = f"standardized {Xsc.name}" # Scaling # ------- self._scaled = scaled if scaled: self._min = Xsc.min(dim=0) self._ampl = Xsc.ptp(dim=0) Xsc -= self._min Xsc /= self._ampl Xsc.name = "scaled %s" % Xsc.name self._Xscaled = Xsc # perform SVD # ----------- svd = SVD(Xsc) sigma = svd.s.diag() U = svd.U VT = svd.VT # select n_pc loadings & compute scores # -------------------------------------------------------------------- # loadings LT = VT LT.title = "loadings (L^T) of " + X.name LT.history = "Created by PCA" # scores S = dot(U, sigma) S.title = "scores (S) of " + X.name S.set_coordset( y=X.y, x=Coord( None, labels=["#%d" % (i + 1) for i in range(svd.s.size)], title="principal component", ), ) S.description = "scores (S) of " + X.name S.history = "Created by PCA" self._LT = LT self._S = S # other attributes # ---------------- self._sv = svd.sv self._sv.x.title = "PC #" self._ev = svd.ev self._ev.x.title = "PC #" self._ev_ratio = svd.ev_ratio self._ev_ratio.x.title = "PC #" self._ev_cum = svd.ev_cum self._ev_cum.x.title = "PC #" return
def __init__(self, dataset, guess, **kwargs): # lgtm [py/missing-call-to-init] """ Parameters ---------- dataset : |NDDataset| The dataset on which to perform the MCR-ALS analysis guess : |NDDataset| Initial concentration or spectra verbose : bool If set to True, prints a summary of residuals and residuals change at each iteration. default = False. In any case, the same information is returned in self.logs **kwargs : dict Optimization parameters : See Other Parameters. Other Parameters ---------------- tol : float, optional, default=0.1 Convergence criterion on the change of resisuals. (percent change of standard deviation of residuals). maxit : int, optional, default=50 Maximum number of ALS minimizations. maxdiv : int, optional, default=5. Maximum number of successive non-converging iterations. nonnegConc : list or tuple, default=Default [0, 1, ...] (only non-negative concentrations) Index of species having non-negative concentration profiles. For instance [0, 2] indicates that species #0 and #2 have non-negative conc profiles while species #1 can have negative concentrations. unimodConc : list or tuple, Default=[0, 1, ...] (only unimodal concentration profiles) index of species having unimodal concentrationsprofiles. closureConc : list or tuple, Default=None (no closure) Index of species subjected to a closure constraint. externalConc: list or tuple, Default None (no external concentration). Index of species for which a concentration profile is provided by an external function. getExternalConc : callable An external function that will provide `n_ext` concentration profiles: getExternalConc(C, extConc, ext_to_C_idx, *args) -> extC or etExternalConc(C, extConc, ext_to_C_idx, *args) -> (extC, out2, out3, ...) where C is the current concentration matrix, *args are the parameters needed to completely specify the function, extC is a nadarray or NDDataset of shape (C.y, n_ext), and out1, out2, ... are supplementary outputs returned by the function (e.g. optimized rate parameters) args : tuple, optional. Extra arguments passed to the external function external_to_C_idx : array or tuple, Default=np.arange(next) Indicates the correspondence between the indexes of external chemical profiles and the columns of the C matrix. [1, None, 0] indicates that the first external profile is the second pure species (index 1). nonnegSpec : list or tuple, Default [1, ..., 1] (only non-negative spectra) Indicates species having non-negative spectra unimodSpec : list or tuple, Default [0, ..., 0] (no unimodal concentration profiles) Indicates species having unimodal spectra """ verbose = kwargs.pop('verbose', False) if verbose: set_loglevel(INFO) # Check initial data # ------------------------------------------------------------------------ initConc, initSpec = False, False if type(guess) is np.ndarray: guess = NDDataset(guess) X = dataset if X.shape[0] == guess.shape[0]: initConc = True C = guess.copy() C.name = 'Pure conc. profile, mcs-als of ' + X.name nspecies = C.shape[1] elif X.shape[1] == guess.shape[1]: initSpec = True St = guess.copy() St.name = 'Pure spectra profile, mcs-als of ' + X.name nspecies = St.shape[0] else: raise ValueError('the dimensions of initial concentration ' 'or spectra dataset do not match the data') ny, nx = X.shape # makes a PCA with same number of species Xpca = PCA(X).reconstruct(n_pc=nspecies) # Get optional parameters in kwargs or set them to their default # ------------------------------------------------------------------------ # TODO: make a preference file to set this kwargs # optimization tol = kwargs.get('tol', 0.1) maxit = kwargs.get('maxit', 50) maxdiv = kwargs.get('maxdiv', 5) # constraints on concentrations nonnegConc = kwargs.get('nonnegConc', np.arange(nspecies)) unimodConc = kwargs.get('unimodConc', np.arange(nspecies)) unimodTol = kwargs.get('unimodTol', 1.1) unimodMod = kwargs.get('unimodMod', 'strict') closureConc = kwargs.get('closureConc', None) if closureConc is not None: closureTarget = kwargs.get('closureTarget', np.ones(ny)) closureMethod = kwargs.get('closureMethod', 'scaling') monoDecConc = kwargs.get('monoDecConc', None) monoDecTol = kwargs.get('monoDecTol', 1.1) monoIncConc = kwargs.get('monoIncConc', None) monoIncTol = kwargs.get('monoIncTol', 1.1) externalConc = kwargs.get('externalConc', None) if externalConc is not None: external_to_C_idx = kwargs.get('external_to_C_idx', np.arange(nspecies)) if externalConc is not None: try: getExternalConc = kwargs.get('getExternalConc') except Exception: raise ValueError('A function must be given to get the external concentration profile(s)') external_to_C_idx = kwargs.get('external_to_C_idx', externalConc) args = kwargs.get('args', ()) # constraints on spectra nonnegSpec = kwargs.get('nonnegSpec', np.arange(nspecies)) normSpec = kwargs.get('normSpec', None) # TODO: add unimodal constraint on spectra # Compute initial spectra or concentrations (first iteration...) # ------------------------------------------------------------------------ if initConc: if C.coordset is None: C.set_coordset(y=X.y, x=C.x) St = NDDataset(np.linalg.lstsq(C.data, X.data, rcond=None)[0]) St.name = 'Pure spectra profile, mcs-als of ' + X.name St.title = X.title cy = C.x.copy() if C.x else None cx = X.x.copy() if X.x else None St.set_coordset(y=cy, x=cx) if initSpec: if St.coordset is None: St.set_coordset(y=St.y, x=X.x) Ct = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0] C = NDDataset(Ct.T) C.name = 'Pure conc. profile, mcs-als of ' + X.name C.title = 'concentration' cx = St.y.copy() if St.y else None cy = X.y.copy() if X.y else None C.set_coordset(y=cy, x=cx) change = tol + 1 stdev = X.std() # .data[0] niter = 0 ndiv = 0 logs = '*** ALS optimisation log***\n' logs += '#iter Error/PCA Error/Exp %change\n' logs += '---------------------------------------------------' info_(logs) while change >= tol and niter < maxit and ndiv < maxdiv: C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T niter += 1 # Force non-negative concentration # -------------------------------- if nonnegConc is not None: for s in nonnegConc: C.data[:, s] = C.data[:, s].clip(min=0) # Force unimodal concentration # ---------------------------- if unimodConc is not None: for s in unimodConc: maxid = np.argmax(C.data[:, s]) curmax = C.data[maxid, s] curid = maxid while curid > 0: curid -= 1 if C.data[curid, s] > curmax * unimodTol: if unimodMod == 'strict': C.data[curid, s] = C.data[curid + 1, s] if unimodMod == 'smooth': C.data[curid, s] = (C.data[curid, s] + C.data[ curid + 1, s]) / 2 C.data[curid + 1, s] = C.data[curid, s] curid = curid + 2 curmax = C.data[curid, s] curid = maxid while curid < ny - 1: curid += 1 if C.data[curid, s] > curmax * unimodTol: if unimodMod == 'strict': C.data[curid, s] = C.data[curid - 1, s] if unimodMod == 'smooth': C.data[curid, s] = (C.data[curid, s] + C.data[ curid - 1, s]) / 2 C.data[curid - 1, s] = C.data[curid, s] curid = curid - 2 curmax = C.data[curid, s] # Force monotonic increase # ------------------------ if monoIncConc is not None: for s in monoIncConc: for curid in np.arange(ny - 1): if C.data[curid + 1, s] < C.data[curid, s] / monoIncTol: C.data[curid + 1, s] = C.data[curid, s] # Force monotonic decrease # ---------------------------------------------- if monoDecConc is not None: for s in monoDecConc: for curid in np.arange(ny - 1): if C.data[curid + 1, s] > C.data[curid, s] * monoDecTol: C.data[curid + 1, s] = C.data[curid, s] # Closure # ------------------------------------------ if closureConc is not None: if closureMethod == 'scaling': Q = np.linalg.lstsq(C.data[:, closureConc], closureTarget.T, rcond=None)[0] C.data[:, closureConc] = np.dot(C.data[:, closureConc], np.diag(Q)) elif closureMethod == 'constantSum': totalConc = np.sum(C.data[:, closureConc], axis=1) C.data[:, closureConc] = C.data[:, closureConc] * closureTarget[:, None] / totalConc[:, None] # external concentration profiles # ------------------------------------------ if externalConc is not None: extOutput = getExternalConc(*((C, externalConc, external_to_C_idx,) + args)) if isinstance(extOutput, dict): extC = extOutput['concentrations'] args = extOutput['new_args'] else: extC = extOutput if type(extC) is NDDataset: extC = extC.data C.data[:, externalConc] = extC[:, external_to_C_idx] # stores C in C_hard Chard = C.copy() # compute St St.data = np.linalg.lstsq(C.data, X.data, rcond=None)[0] # stores St in Stsoft Stsoft = St.copy() # Force non-negative spectra # -------------------------- if nonnegSpec is not None: St.data[nonnegSpec, :] = St.data[nonnegSpec, :].clip(min=0) # recompute C for consistency(soft modeling) C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T # rescale spectra & concentrations if normSpec == 'max': alpha = np.max(St.data, axis=1).reshape(nspecies, 1) St.data = St.data / alpha C.data = C.data * alpha.T elif normSpec == 'euclid': alpha = np.linalg.norm(St.data, axis=1).reshape(nspecies, 1) St.data = St.data / alpha C.data = C.data * alpha.T # compute residuals # ----------------- X_hat = dot(C, St) stdev2 = (X_hat - X.data).std() change = 100 * (stdev2 - stdev) / stdev stdev = stdev2 stdev_PCA = (X_hat - Xpca.data).std() # TODO: Check PCA : values are different from the Arnaud version ? logentry = '{:3d} {:10f} {:10f} {:10f}'.format(niter, stdev_PCA, stdev2, change) logs += logentry + '\n' info_(logentry) if change > 0: ndiv += 1 else: ndiv = 0 change = -change if change < tol: logentry = 'converged !' logs += logentry + '\n' info_(logentry) if ndiv == maxdiv: logline = f"Optimization not improved since {maxdiv} iterations... unconverged " \ f"or 'tol' set too small ?\n" logline += 'Stop ALS optimization' logs += logline + '\n' info_(logline) if niter == maxit: logline = 'Convergence criterion (\'tol\') not reached after {:d} iterations.'.format(maxit) logline += 'Stop ALS optimization' logs += logline + '\n' info_(logline) self._X = X self._params = kwargs self._C = C if externalConc is not None: self._extC = extC self._extOutput = extOutput else: self._extC = None self._extOutput = None self._St = St self._logs = logs self._Stsoft = Stsoft self._Chard = Chard