def _linearize(self): if not self.linear or self._data is None: return self._linear = False # to avoid action of the observer if self._squeeze_ndim > 1: error_("Linearization is only implemented for 1D data") return data = self._data.squeeze() # try to find an increment if data.size > 1: inc = np.diff(data) variation = (inc.max() - inc.min()) / data.ptp() if variation < 1.0e-5: self._increment = ( data.ptp() / (data.size - 1) * np.sign(inc[0]) ) # np.mean(inc) # np.round(np.mean(inc), 5) self._offset = data[0] self._size = data.size self._data = None self._linear = True else: self._linear = False else: self._linear = False
def coord(self, dim="x"): """ Return the coordinates along the given dimension. Parameters ---------- dim : int or str A dimension index or name, default index = `x`. If an integer is provided, it is equivalent to the `axis` parameter for numpy array. Returns ------- |Coord| Coordinates along the given axis. """ idx = self._get_dims_index(dim)[0] # should generate an error if the # dimension name is not recognized if idx is None: return None if self._coordset is None: return None # idx is not necessarily the position of the coordinates in the CoordSet # indeed, transposition may have taken place. So we need to retrieve the coordinates by its name name = self.dims[idx] if name in self._coordset.names: idx = self._coordset.names.index(name) return self._coordset[idx] else: error_(f"could not find this dimenson name: `{name}`") return None
def plot(self, **kwargs): """ Generic plot function. This apply to a |NDDataset| but actually delegate the work to a plotter defined by the parameter ``method``. """ # -------------------------------------------------------------------- # select plotter depending on the dimension of the data # -------------------------------------------------------------------- method = 'generic' method = kwargs.pop('method', method) # Find or guess the adequate plotter # ----------------------------------- _plotter = getattr(self, f"plot_{method.replace('+', '_')}", None) if _plotter is None: # no plotter found error_('The specified plotter for method ' '`{}` was not found!'.format(method)) raise IOError # Execute the plotter # -------------------- return _plotter(**kwargs)
def __getitem__(self, key): # search on the preferences if self.parent is not None: res = getattr(self.parent, f"{self.name}_{key}") elif hasattr(plot_preferences, key): res = getattr(plot_preferences, key) elif hasattr(preferences, key): res = getattr(preferences, key) else: alias = self._get_alias(key) if alias: if isinstance(alias, list): res = PreferencesSet( parent=self, name=key, **dict([(n, getattr(self, f"{key}_{n}")) for n in alias]), ) else: res = getattr(self, alias) else: res = super().__getitem__(key) if res is None: error_( f"not found {key}" ) # key = key.replace('_','.').replace('...', '_').replace('..', # '-') # # res = mpl.rcParams[key] return res
def __setitem__(self, key, value): # also change the corresponding preferences if hasattr(plot_preferences, key): try: setattr(plot_preferences, key, value) except TraitError: value = type(plot_preferences.traits()[key].default_value)(value) setattr(plot_preferences, key, value) elif hasattr(preferences, key): setattr(preferences, key, value) elif key in self.keys(): newkey = f'{self.name}_{key}' setattr(plot_preferences, newkey, value) self.parent[newkey] = value return else: # try to find an alias for matplotlib values alias = self._get_alias(key) if alias: newkey = f'{alias}_{key}' setattr(plot_preferences, newkey, value) self.parent[newkey] = value else: error_(f'not found {key}') return super().__setitem__(key, value)
def execute(self, localvars=None): co = 'from spectrochempy import *\n' \ 'import spectrochempy as scp\n' + self._content code = compile(co, '<string>', 'exec') if localvars is None: # locals was not passed, try to avoid missing values for name # such as 'project', 'proj', 'newproj'... # other missing name if they correspond to the parent project # will be subtitued latter upon exception localvars = locals() # localvars['proj']=self.parent # localvars['project']=self.parent try: exec(code, globals(), localvars) return except NameError as e: # most of the time, a script apply to a project # let's try to substitute the parent to the missing name regex = re.compile(r"'(\w+)'") s = regex.search(e.args[0]).group(1) localvars[s] = self.parent # lgtm [py/modification-of-locals] # TODO: check if this a real error or not (need to come # back on this later) try: exec(code, globals(), localvars) except NameError as e: error_(e + '. pass the variable `locals()` : this may solve ' 'this problem! ')
def __setitem__(self, items, value): if self.linear: error_("Linearly defined array are readonly") return super().__setitem__(items, value)
def makestyle(self, filename='mydefault', to_mpl=False): if filename.startswith('scpy'): error_('`scpy` is READ-ONLY. Please use an another style name.') return txt = "" sline = "" for key in mpl.rcParams.keys(): if key in ['animation.avconv_args', 'animation.avconv_path', 'animation.html_args', 'keymap.all_axes', 'mathtext.fallback_to_cm', 'validate_bool_maybe_none', 'savefig.jpeg_quality', 'text.latex.preview', 'backend', 'backend_fallback', 'date.epoch', 'docstring.hardcopy', 'figure.max_open_warning', 'figure.raise_window', 'interactive', 'savefig.directory', 'timezone', 'tk.window_focus', 'toolbar', 'webagg.address', 'webagg.open_in_browser', 'webagg.port', 'webagg.port_retries']: continue val = str(mpl.rcParams[key]) sav = '' while val != sav: sav = val val = val.replace(' ', ' ') line = f'{key:40s} : {val}\n' if line[0] != sline: txt += '\n' sline = line[0] if key not in ['axes.prop_cycle']: line = line.replace('[', '').replace(']', "").replace('\'', '').replace('"', '') if key == 'savefig.bbox': line = f'{key:40s} : standard\n' txt += line.replace("#", '') # Non matplotlib parameters, # some parameters are not saved in matplotlib style sheets so we willa dd them here nonmplpars = ['method_1D', 'method_2D', 'method_3D', 'colorbar', 'show_projections', 'show_projection_x', 'show_projection_y', 'colormap', 'max_lines_in_stack', 'simplify', 'number_of_x_labels', 'number_of_y_labels', 'number_of_z_labels', 'number_of_contours', 'contour_alpha', 'contour_start', 'antialiased', 'rcount', 'ccount'] txt += '\n\n##\n## ADDITIONAL PARAMETERS FOR SPECTROCHEMPY\n##\n' for par in nonmplpars: txt += f"##@{par:37s} : {getattr(self, par)}\n" stylesheet = (pathclean(self.stylesheets) / filename).with_suffix('.mplstyle') stylesheet.write_text(txt) if to_mpl: # make it also accessible to pyplot stylelib = (pathclean(mpl.get_configdir()) / 'stylelib' / filename).with_suffix('.mplstyle') stylelib.write_text(txt) # plot_preferences.traits()['style'].trait_types = plot_preferences.traits()['style'].trait_types +\ # (Unicode(filename),) self.style = filename return self.style
def wrapper(dataset, **kwargs): # On which axis do we want to shift (get axis from arguments) axis, dim = dataset.get_axis(**kwargs, negative_axis=True) # output dataset inplace (by default) or not if not kwargs.pop("inplace", False): new = dataset.copy() # copy to be sure not to modify this dataset else: new = dataset swapped = False if axis != -1: new.swapdims(axis, -1, inplace=True) # must be done in place swapped = True x = new.coordset[dim] if hasattr(x, "_use_time_axis"): x._use_time_axis = True # we need to havze dimentionless or time units # get the lastcoord if x.unitless or x.dimensionless or x.units.dimensionality == "[time]": if not x.linear: # This method apply only to linear coordinates. # we try to linearize it x = LinearCoord(x) if not x.linear: raise TypeError("Coordinate x is not linearisable") data = method(new.data, **kwargs) new._data = data # we needs to increase the x coordinates array x._size = new._data.shape[-1] # update with the new td new.meta.td[-1] = x.size new.history = f"`{method.__name__}` shift performed on dimension `{dim}` with parameters: {kwargs}" else: error_( "zero-filling apply only to dimensions with [time] dimensionality or dimensionless coords\n" "The processing was thus cancelled" ) # restore original data order if it was swapped if swapped: new.swapdims(axis, -1, inplace=True) # must be done inplace return new
def test_ndmath_unary_ufuncs_simple_data(nd2d, name, comment): nd1 = nd2d.copy() / 1.0e10 # divide to avoid some overflow in exp ufuncs # simple unitless NDDataset # -------------------------- assert nd1.unitless f = getattr(np, name) f(nd1) # assert isinstance(r, NDDataset) # NDDataset with units # --------------------- nd1.units = ur.absorbance f = getattr(np, name) # TODO: some ufunc suppress the units! see pint. skip = False # if name not in NDDataset.__remove_units__: # # try: # f(Quantity(1., nd1.units)).units # except TypeError as e: # error_(f"{name} :", e) # skip = True # except AttributeError: # if name in ['positive', 'fabs', 'cbrt', 'spacing', # 'signbit', 'isnan', 'isinf', 'isfinite', 'logical_not', # 'log2', 'log10', 'log1p', 'exp2', 'expm1']: # pass # already solved # else: # info_(f"\n =======> {name} remove units! \n") # except DimensionalityError as e: # error_(f"{name} :", e) # skip = True if not skip: try: f(nd1) # assert isinstance(r, NDDataset) nd1 = nd2d.copy() # reset nd # with units and mask nd1.units = ur.absorbance nd1[1, 1] = MASKED f(nd1) except DimensionalityError as e: error_(f"{name}: ", e)
def _plot_generic(self, **kwargs): if self._squeeze_ndim == 1: ax = plot_1D(self, **kwargs) elif self._squeeze_ndim == 2: ax = plot_2D(self, **kwargs) elif self._squeeze_ndim == 3: ax = plot_3D(self, **kwargs) else: error_("Cannot guess an adequate plotter, nothing done!") return False return ax
def plot_generic(self, **kwargs): """ The generic plotter. It try to guess an adequate basic plot for the data. Other method of plotters are defined explicitely in the ``plotters`` package. Parameters ---------- ax : :class:`matplotlib.axe` the viewplot where to plot. kwargs : optional additional arguments Returns ------- ax Return the handler to ax where the main plot was done """ if self._squeeze_ndim == 1: ax = plot_1D(self, **kwargs) elif self._squeeze_ndim == 2: ax = plot_2D(self, **kwargs) elif self._squeeze_ndim == 3: ax = plot_3D(self, **kwargs) else: error_('Cannot guess an adequate plotter, nothing done!') return False return ax
def _cantera_is_not_available(): if ct is None: error_( "Missing optional dependency 'cantera'. Use conda or pip to install cantera." ) return ct is None
def download_iris(): """ Upload the classical `IRIS` dataset. The `IRIS` dataset is a classical example for machine learning.It is downloaded from the [UCI distant repository](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data) Returns ------- dataset The `IRIS` dataset. See Also -------- read : Read data from experimental data. """ url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" try: connection = True response = requests.get(url, stream=True, timeout=10) except OSError: error_( "OSError: Cannot connect to the UCI repository. Try Scikit-Learn") connection = False if connection: # Download data txtdata = "" for rd in response.iter_content(): txtdata += rd.decode("utf8") fil = StringIO(txtdata) try: data = np.loadtxt(fil, delimiter=",", usecols=range(4)) fil.seek(0) labels = np.loadtxt(fil, delimiter=",", usecols=(4, ), dtype="|S") labels = list((lab.decode("utf8") for lab in labels)) except Exception: raise OSError("can't read JCAMP file") coordx = Coord( labels=[ "sepal_length", "sepal width", "petal_length", "petal_width" ], title="features", ) coordy = Coord(labels=labels, title="samples") new = NDDataset( data, coordset=[coordy, coordx], title="size", name="`IRIS` Dataset", units="cm", ) new.history = "Loaded from UC Irvine machine learning repository" return new else: # Cannot download - use the scikit-learn dataset (if scikit-learn is installed) sklearn = import_optional_dependency("sklearn", errors="ignore") if sklearn is None: raise OSError("Failed in uploading the `IRIS` dataset!") else: from sklearn import datasets data = datasets.load_iris() coordx = Coord( labels=[ "sepal_length", "sepal width", "petal_length", "petal_width" ], title="features", ) labels = [data.target_names[i] for i in data.target] coordy = Coord(labels=labels, title="samples") new = NDDataset( data.data, coordset=[coordy, coordx], title="size", name="`IRIS` Dataset", units="cm", ) new.history = "Loaded from scikit-learn datasets" return new
def makestyle(self, stylename="mydefault", to_mpl=False): """ Create Matplotlib Style files. Parameters ---------- stylename : to_mpl : Returns -------- stylename Name of the style """ if stylename.startswith("scpy"): error_( "Style name starting with `scpy` are READ-ONLY. Please use an another style name." ) return txt = "" sline = "" for key in mpl.rcParams.keys(): if key in [ "animation.avconv_args", "animation.avconv_path", "animation.html_args", "keymap.all_axes", "mathtext.fallback_to_cm", "validate_bool_maybe_none", "savefig.jpeg_quality", "text.latex.preview", "backend", "backend_fallback", "date.epoch", "docstring.hardcopy", "figure.max_open_warning", "figure.raise_window", "interactive", "savefig.directory", "timezone", "tk.window_focus", "toolbar", "webagg.address", "webagg.open_in_browser", "webagg.port", "webagg.port_retries", ]: continue val = str(mpl.rcParams[key]) if val.startswith("CapStyle") or val.startswith("JoinStyle"): val = val.split(".")[-1] sav = "" while val != sav: sav = val val = val.replace(" ", " ") line = f"{key:40s} : {val}\n" if line[0] != sline: txt += "\n" sline = line[0] if key not in ["axes.prop_cycle"]: line = (line.replace("[", "").replace("]", "").replace( "'", "").replace('"', "")) if key == "savefig.bbox": line = f"{key:40s} : standard\n" txt += line.replace("#", "") # Non matplotlib parameters, # some parameters are not saved in matplotlib style sheets so we willa dd them here nonmplpars = [ "method_1D", "method_2D", "method_3D", "colorbar", "show_projections", "show_projection_x", "show_projection_y", "colormap", "max_lines_in_stack", "simplify", "number_of_x_labels", "number_of_y_labels", "number_of_z_labels", "number_of_contours", "contour_alpha", "contour_start", "antialiased", "rcount", "ccount", ] txt += "\n\n##\n## ADDITIONAL PARAMETERS FOR SPECTROCHEMPY\n##\n" for par in nonmplpars: txt += f"##@{par:37s} : {getattr(self, par)}\n" stylesheet = (pathclean(self.stylesheets) / stylename).with_suffix(".mplstyle") stylesheet.write_text(txt) if to_mpl: # make it also accessible to pyplot stylelib = (pathclean(mpl.get_configdir()) / "stylelib" / stylename).with_suffix(".mplstyle") stylelib.write_text() return stylename
def plot(self, method=None, **kwargs): """ Generic plot function. This apply to a |NDDataset| but actually delegate the work to a plotter defined by the keyword parameter ``method``. Parameters ---------- method : str, optional, default: "generic" Specify with plot method to use. **kwargs Any optional parameters to pass to the plot method. See plot_1D, plot_2D and plot_3D for a liste of possible arguments. Returns ------- axe The axe instance on which the plot has bee performed. See Also -------- plot_1D plot_pen plot_bar plot_scatter_pen plot_multiple plot_2D plot_stack plot_map plot_image plot_1D plot_surface plot_waterfall multiplot Examples -------- For 1D data, the default plot is done with method scatter >>> nd = scp.NDDataset([1, 2, 3]) >>> _ = nd.plot() # default to method="scatter" or >>> _ = nd.plot(method="scatter") Equivalently, one can also specify the method to use as follow: >>> _ = nd.plot_scatter() >>> _ = nd.plot_1D() For """ # -------------------------------------------------------------------- # select plotter depending on the dimension of the data # -------------------------------------------------------------------- if method: _plotter = getattr(self, f"plot_{method.replace('+', '_')}", None) if _plotter is None: # no plotter found error_("The specified plotter for method " "`{}` was not found!".format(method)) raise IOError else: _plotter = self._plot_generic # Execute the plotter # -------------------- return _plotter(**kwargs)
def fft(dataset, size=None, sizeff=None, inv=False, ppm=True, **kwargs): """ Apply a complex fast fourier transform. For multidimensional NDDataset, the apodization is by default performed on the last dimension. The data in the last dimension MUST be in time-domain (or without dimension) or an error is raised. To make reverse Fourier transform, i.e., from frequency to time domain, use the `ifft` transform (or equivalently, the `inv=True` parameters. Parameters ---------- dataset : |NDDataset| The dataset on which to apply the fft transformation. size : int, optional Size of the transformed dataset dimension - a shorter parameter is `si`. by default, the size is the closest power of two greater than the data size. sizeff : int, optional The number of effective data point to take into account for the transformation. By default it is equal to the data size, but may be smaller. inv : bool, optional, default=False If True, an inverse Fourier transform is performed - size parameter is not taken into account. ppm : bool, optional, default=True If True, and data are from NMR, then a ppm scale is calculated instead of frequency. **kwargs Optional keyword parameters (see Other Parameters). Returns ------- out Transformed |NDDataset|. Other Parameters ---------------- dim : str or int, optional, default='x'. Specify on which dimension to apply this method. If `dim` is specified as an integer it is equivalent to the usual `axis` numpy parameter. inplace : bool, optional, default=False. True if we make the transform inplace. If False, the function return a new object tdeff : int, optional Alias of sizeff (specific to NMR). If both sizeff and tdeff are passed, sizeff has the priority. See Also -------- ifft : Inverse Fourier transform. """ # datatype is_nmr = dataset.origin.lower() in [ "topspin", ] is_ir = dataset.meta.interferogram # On which axis do we want to apply transform (get axis from arguments) dim = kwargs.pop("dim", kwargs.pop("axis", -1)) axis, dim = dataset.get_axis(dim, negative_axis=True) # output dataset inplace or not inplace = kwargs.pop("inplace", False) if not inplace: # default new = dataset.copy() # copy to be sure not to modify this dataset else: new = dataset # The last dimension is always the dimension on which we apply the fourier transform. # If needed, we swap the dimensions to be sure to be in this situation swapped = False if axis != -1: new.swapdims(axis, -1, inplace=True) # must be done in place swapped = True # Select the last coordinates x = new.coordset[dim] # Performs some dimentionality checking error = False if (not inv and not x.unitless and not x.dimensionless and x.units.dimensionality != "[time]"): error_( "fft apply only to dimensions with [time] dimensionality or dimensionless coords\n" "fft processing was thus cancelled") error = True elif (inv and not x.unitless and x.units.dimensionality != "1/[time]" and not x.dimensionless): error_( "ifft apply only to dimensions with [frequency] dimensionality or with ppm units " "or dimensionless coords.\n ifft processing was thus cancelled") error = True # Should not be masked elif new.is_masked: error_( "current fft or ifft processing does not support masked data as input.\n processing was thus cancelled" ) error = True # Coordinates should be uniformly spaced (linear coordinate) if not x.linear: # try to linearize it x.linear = True if not x.linear: # linearization failed error = True if hasattr(x, "_use_time_axis"): x._use_time_axis = True # we need to havze dimentionless or time units if not error: # OK we can proceed # time domain size td = None if not inv: td = x.size # if no size (or si) parameter then use the size of the data (size not used for inverse transform if size is None or inv: size = kwargs.get("si", x.size) # we default to the closest power of two larger of the data size if is_nmr: size = largest_power_of_2(size) # do we have an effective td to apply tdeff = sizeff if tdeff is None: tdeff = kwargs.get("tdeff", td) if tdeff is None or tdeff < 5 or tdeff > size: tdeff = size # Eventually apply the effective size new[..., tdeff:] = 0.0 # Should we work on complex or hypercomplex data # interleaved is in case of >2D data ( # TODO: >D not yet implemented in ndcomplex.py iscomplex = False if axis == -1: iscomplex = new.is_complex if new.is_quaternion or new.is_interleaved: iscomplex = True # If we are in NMR we have an additional complication due to the mode # of acquisition (sequential mode when ['QSEQ','TPPI','STATES-TPPI']) encoding = "undefined" if not inv and "encoding" in new.meta: encoding = new.meta.encoding[-1] qsim = encoding in ["QSIM", "DQD"] qseq = "QSEQ" in encoding states = "STATES" in encoding echoanti = "ECHO-ANTIECHO" in encoding tppi = "TPPI" in encoding qf = "QF" in encoding zf_size(new, size=size, inplace=True) # Perform the fft if qsim: # F2 fourier transform data = _fft(new.data) elif qseq: raise NotImplementedError("QSEQ not yet implemented") elif states: data = _states_fft(new.data, tppi) elif tppi: data = _tppi_fft(new.data) elif echoanti: data = _echoanti_fft(new.data) elif qf: # we must perform a real fourier transform of a time domain dataset data = _qf_fft(new.data) elif iscomplex and inv: # We assume no special encoding for inverse complex fft transform data = _ifft(new.data) elif not iscomplex and not inv and is_ir: # transform interferogram data = _interferogram_fft(new.data) elif not iscomplex and inv: raise NotImplementedError("Inverse FFT for real dimension") else: raise NotImplementedError( f"{encoding} not yet implemented. We recommend you to put an issue on " f"Github, so we will not forget to work on this!.") # We need here to create a new dataset with new shape and axis new._data = data new.mask = False # create new coordinates for the transformed data if is_nmr: sfo1 = new.meta.sfo1[-1] bf1 = new.meta.bf1[-1] sf = new.meta.sf[-1] sw = new.meta.sw_h[-1] if new.meta.nuc1 is not None: nuc1 = new.meta.nuc1[-1] regex = r"([^a-zA-Z]+)([a-zA-Z]+)" m = re.match(regex, nuc1) if m is not None: mass = m[1] name = m[2] nucleus = "^{" + mass + "}" + name else: nucleus = "" else: nucleus = "" else: sfo1 = 0 * ur.Hz bf1 = sfo1 dw = x.spacing if isinstance(dw, list): print() sw = 1 / 2 / dw sf = -sw / 2 size = size // 2 if not inv: # time to frequency sizem = max(size - 1, 1) deltaf = -sw / sizem first = sfo1 - sf - deltaf * sizem / 2.0 # newcoord = type(x)(np.arange(size) * deltaf + first) newcoord = LinearCoord.arange(size) * deltaf + first newcoord.show_datapoints = False newcoord.name = x.name new.title = "intensity" if is_nmr: newcoord.title = f"${nucleus}$ frequency" newcoord.ito("Hz") elif is_ir: new._units = None newcoord.title = "wavenumbers" newcoord.ito("cm^-1") else: newcoord.title = "frequency" newcoord.ito("Hz") else: # frequency or ppm to time sw = abs(x.data[-1] - x.data[0]) if x.units == "ppm": sw = bf1.to("Hz") * sw / 1.0e6 deltat = (1.0 / sw).to("us") newcoord = LinearCoord.arange(size) * deltat newcoord.name = x.name newcoord.title = "time" newcoord.ito("us") if is_nmr and not inv: newcoord.meta.larmor = bf1 # needed for ppm transformation ppm = kwargs.get("ppm", True) if ppm: newcoord.ito("ppm") newcoord.title = rf"$\delta\ {nucleus}$" new.coordset[dim] = newcoord # update history s = "ifft" if inv else "fft" new.history = f"{s} applied on dimension {dim}" # PHASE ? iscomplex = new.is_complex or new.is_quaternion if iscomplex and not inv: # phase frequency domain # if some phase related metadata do not exist yet, initialize them new.meta.readonly = False if not new.meta.phased: new.meta.phased = [False] * new.ndim if not new.meta.phc0: new.meta.phc0 = [0] * new.ndim if not new.meta.phc1: new.meta.phc1 = [0] * new.ndim if not new.meta.exptc: new.meta.exptc = [0] * new.ndim if not new.meta.pivot: new.meta.pivot = [0] * new.ndim # applied the stored phases new.pk(inplace=True) new.meta.pivot[-1] = abs(new).coordmax(dim=dim) new.meta.readonly = True # restore original data order if it was swapped if swapped: new.swapdims(axis, -1, inplace=True) # must be done inplace return new
def download_nist_ir(CAS, index="all"): """ Upload IR spectra from NIST webbook Parameters ---------- CAS : int or str the CAS number, can be given as "XXXX-XX-X" (str), "XXXXXXX" (str), XXXXXXX (int) index : str or int or tuple of ints If set to 'all' (default, import all available spectra for the compound corresponding to the index, or a single spectrum, or selected spectra. Returns ------- list of NDDataset or NDDataset The dataset(s). See Also -------- read : Read data from experimental data. """ if isinstance(CAS, str) and "-" in CAS: CAS = CAS.replace("-", "") if index == "all": # test urls and return list if any... index = [] i = 0 while "continue": url = ( f"https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C{CAS}&Index={i}&Type=IR" ) try: response = requests.get(url, timeout=10) if b"Spectrum not found" in response.content[:30]: break else: index.append(i) i += 1 except OSError: error_("OSError: could not connect to NIST") return None if len(index) == 0: error_("NIST IR: no spectrum found") return elif len(index) == 1: info_("NIST IR: 1 spectrum found") else: info_("NISTR IR: {len(index)} spectra found") elif isinstance(index, int): index = [index] elif not is_iterable(index): raise ValueError("index must be 'all', int or iterable of int") out = [] for i in index: # sample adress (water, spectrum 1) # https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C7732185&Index=1&Type=IR url = f"https://webbook.nist.gov/cgi/cbook.cgi?JCAMP=C{CAS}&Index={i}&Type=IR" try: response = requests.get(url, stream=True, timeout=10) if b"Spectrum not found" in response.content[:30]: error_( f"NIST IR: Spectrum {i} does not exist... please check !") if i == index[-1] and out == []: return None else: break except OSError: error_("OSError: Cannot connect... ") return None # Load data txtdata = "" for rd in response.iter_content(): txtdata += rd.decode("utf8") with open("temp.jdx", "w") as f: f.write(txtdata) try: ds = read_jcamp("temp.jdx") # replace the default entry ":imported from jdx file": ds.history[0] = ds.history[0][:len(str(datetime.now( timezone.utc)))] + (f" : downloaded from NIST: {url}\n") out.append(ds) (Path(".") / "temp.jdx").unlink() except Exception: raise OSError( "Can't read this JCAMP file: please report the issue to Spectrochempy developpers" ) if len(out) == 1: return out[0] else: return out
def smooth(dataset, window_length=5, window='flat', **kwargs): """ Smooth the data using a window with requested size. This method is based on the convolution of a scaled window with the signal. The signal is prepared by introducing reflected copies of the signal (with the window size) in both ends so that transient parts are minimized in the begining and end part of the output data. Parameters ---------- dataset : |NDDataset| or a ndarray-like object Input object. window_length : int, optional, default=5 The dimension of the smoothing window; must be an odd integer. window : str, optional, default='flat' The type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'. flat window will produce a moving average smoothing. **kwargs : dict See other parameters. Returns ------- smoothed Same type as input dataset. Other Parameters ---------------- dim : str or int, optional, default='x'. Specify on which dimension to apply this method. If `dim` is specified as an integer it is equivalent to the usual `axis` numpy parameter. inplace : bool, optional, default=False. True if we make the transform inplace. If False, the function return a new object See Also -------- savgol_filter : Apply a Savitzky-Golay filter. Examples -------- >>> import spectrochempy as scp >>> ds = scp.read("irdata/nh4y-activation.spg") >>> ds.smooth(window_length=11) NDDataset: [float64] a.u. (shape: (y:55, x:5549)) """ if not kwargs.pop('inplace', False): # default new = dataset.copy() else: new = dataset is_ndarray = False axis = kwargs.pop('dim', kwargs.pop('axis', -1)) if hasattr(new, 'get_axis'): axis, dim = new.get_axis(axis, negative_axis=True) else: is_ndarray = True swaped = False if axis != -1: new.swapdims(axis, -1, inplace=True) # must be done in place swaped = True if (window_length % 2) != 1: error_("Window length must be an odd integer.") if new.shape[-1] < window_length: error_("Input vector needs to be bigger than window size.") return new if window_length < 3: return new wind = { 'flat': np.ones, 'hanning': np.hanning, 'hamming': np.hamming, 'bartlett': np.bartlett, 'blackman': np.blackman, } if not callable(window): if window not in wind.keys(): error_( "Window must be a callable or a string among 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'" ) return new window = wind[window] # extend on both side to limit side effects dat = np.r_['-1', new.data[..., window_length - 1:0:-1], new.data, new.data[..., -1:-window_length:-1]] w = window(window_length) data = np.apply_along_axis(np.convolve, -1, dat, w / w.sum(), mode='valid') data = data[..., int(window_length / 2):-int(window_length / 2)] if not is_ndarray: new.data = data new.history = f'smoothing with a window:{window.__name__} of length {window_length}' # restore original data order if it was swaped if swaped: new.swapdims(axis, -1, inplace=True) # must be done inplace else: new = data return new
def wrapper(dataset, **kwargs): # On which axis do we want to phase (get axis from arguments) axis, dim = dataset.get_axis(**kwargs, negative_axis=True) # output dataset inplace (by default) or not if not kwargs.pop('inplace', False): new = dataset.copy() # copy to be sure not to modify this dataset else: new = dataset swaped = False if axis != -1: new.swapdims(axis, -1, inplace=True) # must be done in place swaped = True # Get the coordinates for the last dimension x = new.coordset[dim] # check if the dimensionality is compatible with this kind of functions if x.unitless or x.dimensionless or x.units.dimensionality != '[time]': # extract inital phase from metadata def _check_units(par, default_units, inv=False): if not isinstance(par, Quantity): par *= Quantity(1., default_units) elif inv: if par == 0: return par par = 1. / (1. / par).to(default_units) else: par = par.to(default_units) return par # Set correct units for the parameters dunits = dataset.coordset[dim].units current = [new.meta.phc0[-1], new.meta.phc1[-1]] rel = kwargs.pop('rel', False) if rel: # relative phase current = [0, 0] kwargs['phc0'] = (_check_units(kwargs.get('phc0', 0), 'degree') - current[0]).magnitude kwargs['phc1'] = (_check_units(kwargs.get('phc1', 0), 'degree') - current[1]).magnitude kwargs['pivot'] = _check_units(kwargs.get('pivot', new.meta.pivot[-1]), dunits).magnitude kwargs['exptc'] = _check_units(kwargs.get('exptc', new.meta.get('exptc', [0] * new.ndim)[-1]), dunits, inv=True).magnitude if not new.meta.phased[-1]: # initial phase from topspin have not yet been used kwargs['phc0'] = -kwargs['phc0'] kwargs['phc1'] = -kwargs['phc1'] apod = method(new.data, **kwargs) new *= apod new.history = f'`{method.__name__}` applied to dimension `{dim}` with parameters: {kwargs}' if not new.meta.phased[-1]: new.meta.phased[-1] = True new.meta.phc0[-1] = 0 * ur.degree new.meta.phc1[-1] = 0 * ur.degree new.meta.exptc[-1] = 0 * (1 / dunits) else: if rel: new.meta.phc0[-1] += kwargs['phc0'] * ur.degree new.meta.phc1[-1] += kwargs['phc1'] * ur.degree else: new.meta.phc0[-1] = kwargs['phc0'] * ur.degree new.meta.phc1[-1] = kwargs['phc1'] * ur.degree # TODO: to do for exptc too! new.meta.exptc[-1] = kwargs['exptc'] * (1 / dunits) new.meta.pivot[-1] = kwargs['pivot'] * dunits else: # not (x.unitless or x.dimensionless or x.units.dimensionality != '[time]') error_('This method apply only to dimensions with [frequency] or [dimensionless] dimensionality.\n' 'Phase processing was thus cancelled') # restore original data order if it was swaped if swaped: new.swapdims(axis, -1, inplace=True) # must be done inplace return new
def wrapper(dataset, **kwargs): # what to return retapod = kwargs.pop('retapod', False) dryrun = kwargs.pop('dryrun', False) # is_nmr = dataset.origin.lower() in ["topspin", ] is_ir = dataset.origin.lower() in ["omnic", "opus"] # On which axis do we want to apodize? (get axis from arguments) axis, dim = dataset.get_axis(**kwargs, negative_axis=True) # output dataset inplace (by default) or not if not kwargs.pop('inplace', False) and not dryrun: new = dataset.copy( ) # copy to be sure not to modify this dataset else: new = dataset # The last dimension is always the dimension on which we apply the apodization window. # If needed, we swap the dimensions to be sure to be in this situation swaped = False if axis != -1: new.swapdims(axis, -1, inplace=True) # must be done in place swaped = True # Get the coordinates for the last dimension x = new.coordset[dim] if hasattr(x, '_use_time_axis'): store = x._use_time_axis x._use_time_axis = True # we need to have dimentionless or time units # check if the dimensionality is compatible with this kind of functions if x.unitless or x.dimensionless or x.units.dimensionality == '[time]': # Set correct units for parameters dunits = dataset.coordset[dim].units apod = {} for key, default_units in units.items(): if key not in kwargs or default_units is None: continue par = kwargs[key] if isinstance(par, str): par = Quantity(par) if not isinstance(par, Quantity): # set to default units par *= Quantity(1., default_units) apod[key] = par if par.dimensionality == 1 / dunits.dimensionality: kwargs[key] = 1. / (1. / par).to(dunits) else: kwargs[key] = par.to(dunits) kwargs[key] = kwargs[key].magnitude # Call to the apodize function # ---------------------------- # now call the method with unitless parameters if is_ir: # we must apodize at the top of the interferogram. zpd = int(np.argmax(new.data, -1)) dist2end = x.size - zpd apod_arr = method(np.empty(2 * dist2end), **kwargs) apod_arr = apod_arr[-x.size:] else: apod_arr = method(x.data, **kwargs) if kwargs.pop('rev', False): apod_arr = apod_arr[::-1] # reverse apodization if kwargs.pop('inv', False): apod_arr = 1. / apod_arr # invert apodization if not dryrun: new.history = f'`{method.__name__}` apodization performed on dimension `{dim}` ' \ f'with parameters: {apod}' # Apply? if not dryrun: new._data *= apod_arr else: # not (x.unitless or x.dimensionless or x.units.dimensionality != '[time]') error_( 'This method apply only to dimensions with [time] or [dimensionless] dimensionality.\n' 'Apodization processing was thus cancelled') apod_arr = 1. # restore original data order if it was swaped if swaped: new.swapdims(axis, -1, inplace=True) # must be done inplace if hasattr(x, '_use_time_axis'): new.x._use_time_axis = store if retapod: apodcurve = type(new)(apod_arr, coordset=[x]) return new, apodcurve else: return new
def __getitem__(self, items, **kwargs): if isinstance(items, list): # Special case of fancy indexing items = (items,) # choose, if we keep the same or create new object inplace = False if isinstance(items, tuple) and items[-1] == INPLACE: items = items[:-1] inplace = True # Eventually get a better representation of the indexes keys = self._make_index(items) # init returned object if inplace: new = self else: new = self.copy() # slicing by index of all internal array if new.data is not None: udata = new.data[keys] if new.linear: # if self.increment > 0: # new._offset = udata.min() # else: # new._offset = udata.max() new._size = udata.size if new._size > 1: inc = np.diff(udata) variation = (inc.max() - inc.min()) / udata.ptp() if variation < 1.0e-5: new._increment = np.mean(inc) # np.round(np.mean( # inc), 5) new._offset = udata[0] new._data = None new._linear = True else: new._linear = False else: new._linear = False if not new.linear: new._data = np.asarray(udata) if self.is_labeled: # case only of 1D dataset such as Coord new._labels = np.array(self._labels[keys]) if new.is_empty: error_( f"Empty array of shape {new._data.shape} resulted from slicing.\n" f"Check the indexes and make sure to use floats for location slicing" ) new = None new._mask = NOMASK # we need to keep the names when copying coordinates to avoid later # problems new.name = self.name return new
def align(dataset, *others, **kwargs): """ Align individual |NDDataset| along given dimensions using various methods. Parameters ----------- dataset : |NDDataset| Dataset on which we want to salign other objects. *others : |NDDataset| Objects to align. dim : str. Optional, default='x' Along which axis to perform the alignment. dims : list of str, optional, default=None Align along all dims defined in dims (if dim is also defined, then dims have higher priority). method : enum ['outer', 'inner', 'first', 'last', 'interpolate'], optional, default='outer' Which method to use for the alignment. If align is defined : * 'outer' means that a union of the different coordinates is achieved (missing values are masked) * 'inner' means that the intersection of the coordinates is used * 'first' means that the first dataset is used as reference * 'last' means that the last dataset is used as reference * 'interpolate' means that interpolation is performed relative to the first dataset. interpolate_method : enum ['linear','pchip']. Optional, default='linear' Method of interpolation to performs for the alignment. interpolate_sampling : 'auto', int or float. Optional, default='auto' * 'auto' : sampling is determined automatically from the existing data. * int : if an integer values is specified, then the sampling interval for the interpolated data will be splitted in this number of points. * float : If a float value is provided, it determines the interval between the interpolated data. coord : |Coord|, optional, default=None coordinates to use for alignment. Ignore those corresponding to the dimensions to align. copy : bool, optional, default=True If False then the returned objects will share memory with the original objects, whenever it is possible : in principle only if reindexing is not necessary. Returns -------- aligned_datasets : tuple of |NDDataset| Same objects as datasets with dimensions aligned. Raises ------ ValueError issued when the dimensions given in `dim` or `dims` argument are not compatibles (units, titles, etc...). """ # DEVELOPPER NOTE # There is probably better methods, but to simplify dealing with # LinearCoord, we transform them in Coord before treatment (going back # to linear if possible at the end of the process) # TODO: Perform an alignment along numeric labels # TODO: add example in docs # copy objects? copy = kwargs.pop('copy', True) # make a single list with dataset and the remaining object objects = [dataset] + list(others) # should we align on given external coordinates extern_coord = kwargs.pop('coord', None) if extern_coord and extern_coord.implements('LinearCoord'): extern_coord = Coord(extern_coord, linear=False, copy=True) # what's the method to use (by default='outer') method = kwargs.pop('method', 'outer') # trivial cases where alignment is not possible or unecessary if not objects: warning_('No object provided for alignment!') return None if len(objects) == 1 and objects[0].implements( 'NDDataset') and extern_coord is None: # no necessary alignment return objects # evaluate on which axis we align axis, dims = dataset.get_axis(only_first=False, **kwargs) # check compatibility of the dims and prepare the dimension for alignment for axis, dim in zip(axis, dims): # get all objets to align _objects = {} _nobj = 0 for idx, object in enumerate(objects): if not object.implements('NDDataset'): error_( f'Bad object(s) found: {object}. Note that only NDDataset ' f'objects are accepted ' f'for alignment') return None _objects[_nobj] = { 'obj': object.copy(), 'idx': idx, } _nobj += 1 _last = _nobj - 1 # get the reference object (by default the first, except if method if # set to 'last' ref_obj_index = 0 if method == 'last': ref_obj_index = _last ref_obj = _objects[ref_obj_index]['obj'] # as we will sort their coordinates at some point, we need to know # if the coordinates need to be reversed at # the end of the alignment process reversed = ref_obj.coordset[dim].reversed if reversed: ref_obj.sort(descend=False, dim=dim, inplace=True) # get the coordset corresponding to the reference object ref_obj_coordset = ref_obj.coordset # get the coordinate for the reference dimension ref_coord = ref_obj_coordset[dim] # as we will sort their coordinates at some point, we need to know # if the coordinates need to be reversed at # the end of the alignment process reversed = ref_coord.reversed # prepare a new Coord object to store the final new dimension new_coord = ref_coord.copy() ndec = get_n_decimals(new_coord.data.max(), 1.e-5) if new_coord.implements('LinearCoord'): new_coord = Coord(new_coord, linear=False, copy=True) # loop on all object for index, object in _objects.items(): obj = object['obj'] if obj is ref_obj: # not necessary to compare with itself! continue if reversed: obj.sort(descend=False, dim=dim, inplace=True) # get the current objet coordinates and check compatibility coord = obj.coordset[dim] if coord.implements('LinearCoord') or coord.linear: coord = Coord(coord, linear=False, copy=True) if not coord.is_units_compatible(ref_coord): # not compatible, stop everything raise UnitsCompatibilityError( 'NDataset to align must have compatible units!') # do units transform if necesssary so coords can be compared if coord.units != ref_coord.units: coord.ito(ref_coord) # adjust the new_cord depending on the method of alignement new_coord_data = set(np.around(new_coord.data, ndec)) coord_data = set(np.around(coord.data, ndec)) if method in ['outer', 'interpolate']: # in this case we do a union of the coords (masking the # missing values) # For method=`interpolate`, the interpolation will be # performed in a second step new_coord._data = sorted(coord_data | new_coord_data) elif method == 'inner': # take only intersection of the coordinates # and generate a warning if it result something null or new_coord._data = sorted(coord_data & new_coord_data) elif method in ['first', 'last']: # we take the reference coordinates already determined as # basis (masking the missing values) continue else: raise NotImplementedError(f'The method {method} is unknown!') # Now perform alignment of all objects on the new coordinates for index, object in _objects.items(): obj = object['obj'] # get the dim index for the given object dim_index = obj.dims.index(dim) # prepare slicing keys ; set slice(None) for the untouched # dimensions preceeding the dimension of interest prepend_keys = [slice(None)] * dim_index # New objects for obj must be created with the new coordinates # change the data shape new_obj_shape = list(obj.shape) new_obj_shape[dim_index] = len(new_coord) new_obj_data = np.full(new_obj_shape, np.NaN) # create new dataset for obj and ref_objects if copy: new_obj = obj.copy() else: new_obj = obj # update the data and mask coord = obj.coordset[dim] coord_data = set(np.around(coord.data, ndec)) dim_loc = new_coord._loc2index(sorted(coord_data)) loc = tuple(prepend_keys + [dim_loc]) new_obj._data = new_obj_data # mask all the data then unmask later the relevant data in # the next step if not new_obj.is_masked: new_obj.mask = MASKED new_obj.mask[loc] = False else: mask = new_obj.mask.copy() new_obj.mask = MASKED new_obj.mask[loc] = mask # set the data for the loc new_obj._data[loc] = obj.data # update the coordinates new_coordset = obj.coordset.copy() if coord.is_labeled: label_shape = list(coord.labels.shape) label_shape[0] = new_coord.size new_coord._labels = np.zeros(tuple(label_shape)).astype( coord.labels.dtype) new_coord._labels[:] = '--' new_coord._labels[dim_loc] = coord.labels setattr(new_coordset, dim, new_coord) new_obj._coordset = new_coordset # reversed? if reversed: # we must reverse the given coordinates new_obj.sort(descend=reversed, dim=dim, inplace=True) # update the _objects _objects[index]['obj'] = new_obj if method == 'interpolate': warning_( 'Interpolation not yet implemented - for now equivalent ' 'to `outer`') # the new transformed object must be in the same order as the passed # objects # and the missing values must be masked (for the moment they are defined to NaN for index, object in _objects.items(): obj = object['obj'] # obj[np.where(np.isnan(obj))] = MASKED # mask NaN values obj[np.where(np.isnan( obj))] = 99999999999999. # replace NaN values (to simplify # comparisons) idx = int(object['idx']) objects[idx] = obj # we also transform into linear coord if possible ? pass # TODO: # Now return return tuple(objects)
def download_IRIS(): """ Upload the classical IRIS dataset. The IRIS dataset is a classical example for machine learning.It is downloaded from the [UCI distant repository](https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data) Returns ------- downloaded The IRIS dataset. See Also -------- read : Ro read data from experimental data. Examples -------- Upload a dataset form a distant server >>> import spectrochempy as scp >>> dataset = scp.download_IRIS() """ url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" try: connection = True response = requests.get(url, stream=True, timeout=10) except Exception as e: error_(e) connection = False if connection: # Download data txtdata = '' for rd in response.iter_content(): txtdata += rd.decode('utf8') fil = StringIO(txtdata) try: data = np.loadtxt(fil, delimiter=',', usecols=range(4)) fil.seek(0) labels = np.loadtxt(fil, delimiter=',', usecols=(4, ), dtype='|S') labels = list((lab.decode("utf8") for lab in labels)) except Exception: raise IOError( '{} is not a .csv file or its structure cannot be recognized') coordx = Coord(labels=[ 'sepal_length', 'sepal width', 'petal_length', 'petal_width' ], title='features') coordy = Coord(labels=labels, title='samples') new = NDDataset(data, coordset=[coordy, coordx], title='size', name='IRIS Dataset', units='cm') new.history = 'Loaded from UC Irvine machine learning repository' return new else: # Cannot download - use the scikit-learn dataset (if scikit-learn is installed) try: from sklearn import datasets except ImportError: raise IOError('Failed in uploading the IRIS dataset!') # import some data to play with data = datasets.load_iris() coordx = Coord(labels=[ 'sepal_length', 'sepal width', 'petal_length', 'petal_width' ], title='features') labels = [data.target_names[i] for i in data.target] coordy = Coord(labels=labels, title='samples') new = NDDataset(data.data, coordset=[coordy, coordx], title='size', name='IRIS Dataset', units='cm') new.history = 'Loaded from scikit-learn datasets' return new