def _prep_toilim_avg(self): """ Set up averaging data across trials given `toilim` selection Parameters ---------- self : Syncopy data object Input object that is being processed by the respective :func:`~syncopy.singlepanelplot` or :func:`~syncopy.multipanelplot` function/class method. Returns ------- tLengths : 1D :class:`numpy.ndarray` Array of length `nSelectedTrials` with each element encoding the number of samples contained in the provided `toilim` selection. Notes ----- If `tLengths` contains more than one unique element, a :class:`~syncopy.shared.errors.SPYValueError` is raised. Note further, that this is an auxiliary method that is intended purely for internal use. Please refer to the user-exposed methods :func:`~syncopy.singlepanelplot` and/or :func:`~syncopy.multipanelplot` to actually generate plots of Syncopy data objects. See also -------- :func:`~syncopy.singlepanelplot` : visualize Syncopy objects using single-panel figure(s) :func:`~syncopy.multipanelplot` : visualize Syncopy objects using multi-panel figure(s) """ tLengths = np.zeros((len(self._selection.trials), ), dtype=np.intp) for k, tsel in enumerate(self._selection.time): if not isinstance(tsel, slice): msg = "Cannot average `toilim` selection. Please check `.time` property for consistency. " raise SPYError(msg) start, stop = tsel.start, tsel.stop if start is None: start = 0 if stop is None: stop = self._get_time([self._selection.trials[k]], toilim=[-np.inf, np.inf])[0].stop tLengths[k] = stop - start if np.unique(tLengths).size > 1: lgl = "time-selections of equal length for averaging across trials" act = "time-selections of varying length" raise SPYValueError(legal=lgl, varname="toilim", actual=act) if tLengths[0] < 2: lgl = "time-selections containing at least two samples" act = "time-selections containing fewer than two samples" raise SPYValueError(legal=lgl, varname="toilim", actual=act) return tLengths
def data_parser(data, varname="", dataclass=None, writable=None, empty=None, dimord=None): """ Docstring writable = True/False/None empty=True/False (False: ensure we're working with some contents) """ # Make sure `data` is (derived from) `BaseData` if not any(["BaseData" in str(base) for base in data.__class__.__mro__]): raise SPYTypeError(data, varname=varname, expected="Syncopy data object") # If requested, check specific data-class of object if dataclass is not None: if data.__class__.__name__ not in str(dataclass): msg = "Syncopy {} object".format(dataclass) raise SPYTypeError(data, varname=varname, expected=msg) # If requested, ensure object contains data (or not) if empty is not None: legal = "{status:s} Syncopy data object" if empty and not data._is_empty(): raise SPYValueError(legal=legal.format(status="empty"), varname=varname, actual="non-empty") elif not empty and data._is_empty(): raise SPYValueError(legal=legal.format(status="non-empty"), varname=varname, actual="empty") # If requested, ensure proper access to object if writable is not None: legal = "{access:s} to Syncopy data object" actual = "mode = {mode:s}" if writable and data.mode == "r": raise SPYValueError(legal=legal.format(access="write-access"), varname=varname, actual=actual.format(mode=data.mode)) elif not writable and data.mode != "r": raise SPYValueError(legal=legal.format(access="read-only-access"), varname=varname, actual=actual.format(mode=data.mode)) # If requested, check integrity of dimensional information (if non-empty) if dimord is not None: base = "Syncopy {diminfo:s} data object" if data.dimord != dimord: legal = base.format(diminfo="'" + "' x '".join(str(dim) for dim in dimord) + "'") actual = base.format(diminfo="'" + "' x '".join(str(dim) for dim in data.dimord) + "' " if data.dimord else "empty") raise SPYValueError(legal=legal, varname=varname, actual=actual) return
def pre_check(self): ''' Make sure we have a trial average, so the input data only consists of `1 trial`. Can only be performed after initialization! ''' if self.numTrials is None: lgl = 'Initialize the computational Routine first!' act = 'ComputationalRoutine not initialized!' raise SPYValueError(legal=lgl, varname=self.__class__.__name__, actual=act) if self.numTrials != 1: lgl = "1 trial: Granger causality can only be computed on trial averages!" act = f"DataSet contains {self.numTrials} trials" raise SPYValueError(legal=lgl, varname="data", actual=act)
def data(self): """array-like object representing data without trials Trials are concatenated along the time axis. """ if getattr(self._data, "id", None) is not None: if self._data.id.valid == 0: lgl = "open HDF5 file" act = "backing HDF5 file {} has been closed" raise SPYValueError(legal=lgl, actual=act.format(self.filename), varname="data") return self._data
def channel(self, chan): if chan is None: self._channel = None return if self.data is None: raise SPYValueError("Syncopy: Cannot assign `channels` without data. " + "Please assign data first") try: array_parser(chan, varname="channel", ntype="str") except Exception as exc: raise exc # Remove duplicate entries from channel array but preserve original order # (e.g., `[2, 0, 0, 1]` -> `[2, 0, 1`); allows for complex subset-selections _, idx = np.unique(chan, return_index=True) chan = np.array(chan)[np.sort(idx)] nchan = np.unique(self.data[:, self.dimord.index("channel")]).size if chan.size != nchan: lgl = "channel label array of length {0:d}".format(nchan) act = "array of length {0:d}".format(chan.size) raise SPYValueError(legal=lgl, varname="channel", actual=act) self._channel = chan
def unit(self, unit): if unit is None: self._unit = None return if self.data is None: raise SPYValueError("Syncopy - SpikeData - unit: Cannot assign `unit` without data. " + "Please assign data first") nunit = np.unique(self.data[:, self.dimord.index("unit")]).size try: array_parser(unit, varname="unit", ntype="str", dims=(nunit,)) except Exception as exc: raise exc self._unit = np.array(unit)
def validate_padding(pad_to_length, lenTrials): """ Simplified padding """ # supported padding options not_valid = False if not isinstance(pad_to_length, (Number, str, type(None))): not_valid = True elif isinstance(pad_to_length, str) and pad_to_length not in availablePaddingOpt: not_valid = True if isinstance(pad_to_length, bool): # bool is an int subclass, check for it separately... not_valid = True if not_valid: lgl = "`None`, 'nextpow2' or an integer like number" actual = f"{pad_to_length}" raise SPYValueError(legal=lgl, varname="pad_to_length", actual=actual) # here we check for equal lengths trials in case of no user specified absolute padding length # we do a rough 'maxlen' padding, nextpow2 will be overruled in this case if lenTrials.min() != lenTrials.max() and not isinstance( pad_to_length, Number): abs_pad = int(lenTrials.max()) msg = f"Unequal trial lengths present, automatic padding to {abs_pad} samples" SPYWarning(msg) # zero padding of ALL trials the same way if isinstance(pad_to_length, Number): scalar_parser(pad_to_length, varname='pad_to_length', ntype='int_like', lims=[lenTrials.max(), np.inf]) abs_pad = pad_to_length # or pad to optimal FFT lengths # (not possible for unequal lengths trials) elif pad_to_length == 'nextpow2': # after padding abs_pad = _nextpow2(int(lenTrials.min())) # no padding, equal lengths trials elif pad_to_length is None: abs_pad = int(lenTrials.max()) # `abs_pad` is now the (soon to be padded) signal length in samples return abs_pad
def channel(self, channel): if channel is None: self._channel = None return if self.avg is None: raise SPYValueError( "Syncopy: Cannot assign `channels` without data. " + "Please assign data first") try: array_parser(channel, varname="channel", ntype="str", dims=(self.avg.shape[self.dimord.index("channel")], )) except Exception as exc: raise exc self._channel = np.array(channel)
def channel_j(self, channel_j): """ :class:`numpy.ndarray` : list of channel labels """ if channel_j is None: self._channel_j = None return if self.data is None: raise SPYValueError( "Syncopy: Cannot assign `channels` without data. " + "Please assign data first") try: array_parser( channel_j, varname="channel_j", ntype="str", dims=(self.data.shape[self.dimord.index("channel_j")], )) except Exception as exc: raise exc self._channel_j = np.array(channel_j)
def _sizeof(self, obj): """ Estimate memory consumption of Python objects Parameters ---------- obj : Python object Any valid Python object whose memory footprint is of interest. Returns ------- objsize : float Approximate memory footprint of `obj` in megabytes (MB). Notes ----- Memory consumption is is estimated by recursively calling :meth:`sys.getsizeof`. Circular object references are followed up to a (preset) maximal recursion depth. This method was inspired by a routine in `Nifty <https://github.com/mwojnars/nifty/blob/master/util.py>`_. """ # Protect against circular object references by adhering to max. no. of # recursive calls `self._callMax` self._callCount += 1 if self._callCount >= self._callMax: lgl = "minimally nested positional arguments" act = "argument with nesting depth >= {}" raise SPYValueError(legal=lgl, varname="argv", actual=act.format(self._callMax)) # Use `sys.getsizeof` to estimate memory consumption of primitive objects objsize = sys.getsizeof(obj) / 1024**2 if isinstance(obj, dict): return objsize + sum(list(map(self._sizeof, obj.keys()))) + sum(list(map(self._sizeof, obj.values()))) if isinstance(obj, (list, tuple, set)): return objsize + sum(list(map(self._sizeof, obj))) return objsize
def filename_parser(filename, is_in_valid_container=None): """Extract information from Syncopy file and folder names Parameters ---------- filename: str Syncopy data file (\*.<dataclass>.info), Syncopy info file (\*.<dataclass>) or Syncopy container folder (\*.spy) is_in_valid_container: bool If `True`, the `filename` must be inside a folder with a .spy extension. If `False`, `filename` must not be inside a .spy folder. If `None`, the extension of the parent folder is not checked. Returns ------- fileinfo : dict Information extracted from filename and foldername with keys ['filename', 'container', 'folder', 'tag', 'basename', 'extension']. Examples -------- >>> filename_parser('/home/user/monkeyB_20190709_rfmapping_1_amua-stimon.analog') {'filename': 'monkeyB_20190709_rfmapping_1_amua-stimon.analog', 'container': None, 'folder': '/home/schmiedtj_it/Projects/SyNCoPy', 'tag': None, 'basename': 'monkeyB_20190709_rfmapping_1_amua-stimon', 'extension': '.analog'} >>> filename_parser('/home/user/monkeyB_20190709_rfmapping_1_amua-stimon.analog.info') {'filename': 'monkeyB_20190709_rfmapping_1_amua-stimon.analog', 'container': None, 'folder': '/home/user', 'tag': None, 'basename': 'monkeyB_20190709_rfmapping_1_amua-stimon', 'extension': '.analog'} >>> filename_parser('session_1.spy/session_1_amua-stimon.analog') {'filename': 'session_1_amua-stimon.analog', 'container': 'session_1.spy', 'folder': '/home/user/session_1.spy', 'tag': 'amua-stimon', 'basename': 'session_1', 'extension': '.analog'} >>> filename_parser('session_1.spy') {'filename': None, 'container': 'session_1.spy', 'folder': '/home/user', 'tag': None, 'basename': 'session_1', 'extension': '.spy'} See also -------- io_parser : check file and folder names for existence """ if filename is None: return { "filename": None, "container": None, "folder": None, "tag": None, "basename": None, "extension": None } filename = os.path.abspath(os.path.expanduser(filename)) folder, filename = os.path.split(filename) container = folder.split(os.path.sep)[-1] basename, ext = os.path.splitext(filename) if filename.count(".") > 2: raise SPYValueError(legal="single extension, found {}".format(filename.count(".")), actual=filename, varname="filename") if ext == FILE_EXT["dir"] and basename.count(".") > 0: raise SPYValueError(legal="no extension, found {}".format(basename.count(".")), actual=basename, varname="container") if ext == FILE_EXT["info"]: filename = basename basename, ext = os.path.splitext(filename) elif ext == FILE_EXT["dir"]: return { "filename": None, "container": filename, "folder": folder, "tag": None, "basename": basename, "extension": ext } if ext not in FILE_EXT["data"] + (FILE_EXT["dir"],): raise SPYValueError(legal=FILE_EXT["data"], actual=ext, varname="filename extension") folderExtIsSpy = os.path.splitext(container)[1] == FILE_EXT["dir"] if is_in_valid_container is not None: if not folderExtIsSpy and is_in_valid_container: raise SPYValueError(legal=FILE_EXT["dir"], actual=os.path.splitext(container)[1], varname="folder extension") elif folderExtIsSpy and not is_in_valid_container: raise SPYValueError(legal='not ' + FILE_EXT["dir"], actual=os.path.splitext(container)[1], varname="folder extension") if folderExtIsSpy: containerBasename = os.path.splitext(container)[0] if not basename.startswith(containerBasename): raise SPYValueError(legal=containerBasename, actual=filename, varname='start of filename') tag = basename.partition(containerBasename)[-1] if tag == "": tag = None else: if tag[0] == '_': tag = tag[1:] basename = containerBasename else: container = None tag = None return { "filename": filename, "container": container, "folder": folder, "tag": tag, "basename": basename, "extension": ext }
def array_parser(var, varname="", ntype=None, hasinf=None, hasnan=None, lims=None, dims=None, issorted=None): """ Parse array-like objects Parameters ---------- var : array_like Array object to verify varname : str Local variable name used in caller, see Examples for details. ntype : None or str Expected data type of `var`. Possible options are any valid builtin type, all NumPy dtypes as as well as `"numeric"` (a catch-all to ensure `var` only contains numeric elements) and "int_like"` (all elements of `var` are expected to have no significant digits after the decimal point, e.g., 3.0, -12.0 etc.). If `ntype` is `None` the data type of `var` is not checked. hasinf : None or bool If `hasinf` is `False` the input array `var` is considered invalid if it contains non-finite elements (`np.inf`), vice-versa if `hasinf` is `True`. If `hasinf` is `None` elements of `var` are not probed for finiteness. hasnan : None or bool If `hasnan` is `False` the input array `var` is considered invalid if it contains undefined elements (`np.nan`), vice-versa if `hasnan` is `True`. If `hasnan` is `None` elements of `var` are not probed for well-posedness. lims : None or two-element list_like Lower (`lims[0]`) and upper (`lims[1]`) bounds for legal values of `var`'s elements. Note that the code checks for non-strict inequality, i.e., `var[i] = lims[0]` or `var[i] = lims[1]` are both considered to be valid elements of `var`. For complex arrays bounds-checking is performed on both real and imaginary parts of each component of `var`. That is, all elements of `var` have to satisfy `lims[0] <= var[i].real <= lims[1]` as well as `lims[0] <= var[i].imag <= lims[1]` (see Examples for details). Note that `np.inf` and `np.nan` entries are ignored during bounds- checking. Use the keywords `hasinf` and `hasnan` to probe an array for infinite and non-numeric entries, respectively. If `lims` is `None` bounds-checking is not performed. dims : None or int or tuple Expected number of dimensions (if `dims` is an integer) or shape (if `dims` is a tuple) of `var`. By default, singleton dimensions of `var` are ignored if `dims` is a tuple, i.e., for `dims = (10, )` an array `var` with `var.shape = (10, 1)` is considered valid. However, if singleton dimensions are explicitly queried by setting `dims = (10, 1)` any array `var` with `var.shape = (10, )` or `var.shape = (1, 10)` is considered invalid. Unknown dimensions can be represented as `None`, i.e., for `dims = (10, None)` arrays with shape `(10, 1)`, `(10, 100)` or `(10, 0)` are all considered valid, however, any 1d-array (e.g., `var.shape = (10,)`) is invalid. If `dims` is an integer, `var.ndim` has to match `dims` exactly, i.e., any array `var` with `var.shape = (10, )` is considered invalid if `dims = 2` and conversely, `dims = 1` and `var.shape = (10, 1)` triggers an exception. issorted : None or bool If `issorted` is `True`, `var` is expected to be a 1d-array (or 2d-array with a single singleton-dimension, i.e., a row- or column-vector) with elements in ascending order. Conversely, if `issorted` is `False`, `var` is considered invalid if its elements are ordered by magnitude. If `issorted` is `None`, order of array elements is not inspected. Returns ------- Nothing : None Examples -------- Assume `time` is supposed to be a 1d-array with floating point components bounded by 0 and 10. The following calls confirm the validity of `time` >>> time = np.linspace(0, 10, 100) >>> array_parser(time, varname="time", lims=[0, 10], dims=1) >>> array_parser(time, varname="time", lims=[0, 10], dims=(100,)) Ensure additionally that all elements of `time` are ordered by magnitude >>> array_parser(time, varname="time", lims=[0, 10], dims=(100,), issorted=True) Artificially appending a singleton dimension to `time` does not affect parsing: >>> time = time[:,np.newaxis] >>> time.shape (100, 1) >>> array_parser(time, varname="time", lims=[0, 10], dims=(100,), issorted=True) However, explicitly querying for a row-vector fails >>> array_parser(time, varname="time", lims=[0, 10], dims=(1,100)) Complex arrays are parsed analogously: >>> spec = np.array([np.complex(2,3), np.complex(2,-2)]) >>> array_parser(spec, varname="spec", dims=1) >>> array_parser(spec, varname="spec", dims=(2,)) Note that bounds-checking is performed component-wise on both real and imaginary parts: >>> array_parser(spec, varname="spec", lims=[-3, 5]) # valid >>> array_parser(spec, varname="spec", lims=[-1, 5]) # invalid since spec[1].imag < lims[0] However, complex numbers do not admit an order relationship: >>> array_parser(spec, varname="spec", lims=[-3, 5], issorted=True) # invalid Character lists can be parsed as well: >>> channels = ["channel1", "channel2", "channel3"] >>> array_parser(channels, varname="channels", dims=1) >>> array_parser(channels, varname="channels", dims=(3,)) See also -------- scalar_parser : similar functionality for parsing numeric scalars """ # Make sure `var` is array-like and convert it to ndarray to simplify parsing if not isinstance(var, (np.ndarray, list)): raise SPYTypeError(var, varname=varname, expected="array_like") arr = np.array(var) # If bounds-checking is requested but `ntype` is not set, use the # generic "numeric" option to ensure array is actually numeric if (lims is not None or hasnan is not None or hasinf is not None) and ntype is None: ntype = "numeric" # If array-element order parsing is requested by `ntype` and/or `dims` are not # set, use sane defaults to ensure array is numeric and one-dimensional if issorted is not None: if ntype is None: ntype = "numeric" if dims is None: dims = (None, ) # If required, parse type (handle "int_like" and "numeric" separately) if ntype is not None: msg = "dtype = {dt:s}" if ntype in ["numeric", "int_like"]: if not np.issubdtype(arr.dtype, np.number): raise SPYValueError(msg.format(dt="numeric"), varname=varname, actual=msg.format(dt=str(arr.dtype))) if ntype == "int_like": if not np.all([np.round(a) == a for a in arr]): raise SPYValueError(msg.format(dt=ntype), varname=varname) else: if not np.issubdtype(arr.dtype, np.dtype(ntype).type): raise SPYValueError(msg.format(dt=ntype), varname=varname, actual=msg.format(dt=str(arr.dtype))) # If required, parse finiteness of array-elements if hasinf is not None: if not hasinf and np.isinf(arr).any(): lgl = "finite numerical array" act = "array with {} `inf` entries".format(str(np.isinf(arr).sum())) raise SPYValueError(legal=lgl, varname=varname, actual=act) if hasinf and not np.isinf(arr).any(): lgl = "numerical array with infinite (`np.inf`) entries" act = "finite numerical array" raise SPYValueError(legal=lgl, varname=varname, actual=act) # If required, parse well-posedness of array-elements if hasnan is not None: if not hasnan and np.isnan(arr).any(): lgl = "well-defined numerical array" act = "array with {} `NaN` entries".format(str(np.isnan(arr).sum())) raise SPYValueError(legal=lgl, varname=varname, actual=act) if hasnan and not np.isnan(arr).any(): lgl = "numerical array with undefined (`np.nan`) entries" act = "well-defined numerical array" raise SPYValueError(legal=lgl, varname=varname, actual=act) # If required perform component-wise bounds-check (remove NaN's and Inf's first) if lims is not None: fi_arr = arr[np.isfinite(arr)] if np.issubdtype(fi_arr.dtype, np.dtype("complex").type): amin = min(fi_arr.real.min(), fi_arr.imag.min()) amax = max(fi_arr.real.max(), fi_arr.imag.max()) else: amin = fi_arr.min() amax = fi_arr.max() if amin < lims[0] or amax > lims[1]: legal = "all array elements to be bounded by {lb:s} and {ub:s}" raise SPYValueError(legal.format(lb=str(lims[0]), ub=str(lims[1])), varname=varname) # If required parse dimensional layout of array if dims is not None: # Account for the special case of 1d character arrays (that # collapse to 0d-arrays when squeezed) ischar = int(np.issubdtype(arr.dtype, np.dtype("str").type)) # Compare shape or dimension number if isinstance(dims, tuple): if len(dims) > 1: ashape = arr.shape else: if arr.size == 1: ashape = arr.shape else: ashape = max((ischar,), arr.squeeze().shape) if len(dims) != len(ashape): msg = "{}-dimensional array" raise SPYValueError(legal=msg.format(len(dims)), varname=varname, actual=msg.format(len(ashape))) for dk, dim in enumerate(dims): if dim is not None and ashape[dk] != dim: raise SPYValueError("array of shape " + str(dims), varname=varname, actual="shape = " + str(arr.shape)) else: ndim = max(ischar, arr.ndim) if ndim != dims: raise SPYValueError(str(dims) + "d-array", varname=varname, actual=str(ndim) + "d-array") # If required check if array elements are orderd by magnitude if issorted is not None: if not np.all(np.isreal(arr)): lgl = "real-valued array" act = "array containing complex elements" raise SPYValueError(legal=lgl, varname=varname, actual=act) if arr.size <= 1: lgl = "array with at least two elements" act = "array containing (fewer than) one element" raise SPYValueError(legal=lgl, varname=varname, actual=act) ascending = np.diff(arr.flatten()).min() > 0 if issorted and not ascending: lgl = "array with elements in ascending order" act = "unsorted array" raise SPYValueError(legal=lgl, varname=varname, actual=act) if not issorted and ascending: lgl = "unsorted array" act = "array with elements in ascending order" raise SPYValueError(legal=lgl, varname=varname, actual=act) return
def padding(data, padtype, pad="absolute", padlength=None, prepadlength=None, postpadlength=None, unit="samples", create_new=True): """ Perform data padding on Syncopy object or :class:`numpy.ndarray` **Usage Summary** Depending on the value of `pad` the following padding length specifications are supported: +------------+----------------------+---------------+----------------------+----------------------+ | `pad` | `data` | `padlength` | `prepadlength` | `postpadlength` | +============+======================+===============+======================+======================+ | 'absolute' | Syncopy object/array | number | `None`/`bool` | `None`/`bool` | +------------+----------------------+---------------+----------------------+----------------------+ | 'relative' | Syncopy object/array | number/`None` | number/`None`/`bool` | number/`None`/`bool` | +------------+----------------------+---------------+----------------------+----------------------+ | 'maxlen' | Syncopy object | `None`/`bool` | `None`/`bool` | `None`/`bool` | +------------+----------------------+---------------+----------------------+----------------------+ | 'nextpow2' | Syncopy object/array | `None`/`bool` | `None`/`bool` | `None`/`bool` | +------------+----------------------+---------------+----------------------+----------------------+ * `data` can be either a Syncopy object containing multiple trials or a :class:`numpy.ndarray` representing a single trial * (pre/post)padlength: can be either `None`, `True`/`False` or a positive number: if `True` indicates where to pad, e.g., by using ``pad = 'maxlen'`` and ``prepadlength = True``, `data` is padded at the beginning of each trial. **Only** if `pad` is 'relative' are scalar values supported for `prepadlength` and `postpadlength` * ``pad = 'absolute'``: pad to desired absolute length, e.g., by using ``pad = 5`` and ``unit = 'time'`` all trials are (if necessary) padded to 5s length. Here, `padlength` **has** to be provided, `prepadlength` and `postpadlength` can be `None` or `True`/`False` * ``pad = 'relative'``: pad by provided `padlength`, e.g., by using ``padlength = 20`` and ``unit = 'samples'``, 20 samples are padded symmetrically around (before and after) each trial. Use ``padlength = 20`` and ``prepadlength = True`` **or** directly ``prepadlength = 20`` to pad before each trial. Here, at least one of `padlength`, `prepadlength` or `postpadlength` **has** to be provided. * ``pad = 'maxlen'``: (only valid for **Syncopy objects**) pad up to maximal trial length found in `data`. All lengths have to be either Boolean indicating padding location or `None` (if all are `None`, symmetric padding is performed) * ``pad = 'nextpow2'``: pad each trial up to closest power of two. All lengths have to be either Boolean indicating padding location or `None` (if all are `None`, symmetric padding is performed) Full documentation below. Parameters ---------- data : Syncopy object or :class:`numpy.ndarray` Non-empty Syncopy data object or array representing numeric data to be padded. **NOTE**: if `data` is a :class:`numpy.ndarray`, it is assumed that it represents recordings from only a single trial, where its first axis corresponds to time. In other words, `data` is a 'time'-by-'channel' array such that its rows reflect samples and its columns represent channels. If `data` is a Syncopy object, trial information and dimensional order are fetched from `data.trials` and `data.dimord`, respectively. padtype : str Padding value(s) to be used. Available options are: * 'zero' : pad using zeros * 'nan' : pad using `np.nan`'s * 'mean' : pad with by-channel mean value across each trial * 'localmean' : pad with by-channel mean value using only `padlength` or `prepadlength`/`postpadlength` number of boundary-entries for averaging * 'edge' : pad with trial-boundary values * 'mirror' : pad with reflections of trial-boundary values pad : str Padding mode to be used. Available options are: * 'absolute' : pad each trial to achieve a desired absolute length such that all trials have identical length post padding. If `pad` is `absolute` a `padlength` **has** to be provided, `prepadlength` and `postpadlength` may be `True` or `False`, respectively (see Examples for details). * 'relative' : pad each trial by provided `padlength` such that all trials are extended by the same amount regardless of their original lengths. If `pad` is `relative`, `prepadlength` and `postpadlength` can either be specified directly (using numerical values) or implicitly by only providing `padlength` and setting `prepadlength` and `postpadlength` to `True` or `False`, respectively (see Examples for details). If `pad` is `relative` at least one of `padlength`, `prepadlength` or `postpadlength` **has** to be provided. * 'maxlen' : only usable if `data` is a Syncopy object. If `pad` is 'maxlen' all trials are padded to achieve the length of the longest trial in `data`, i.e., post padding, all trials have the same length, which equals the size of the longest trial pre-padding. For ``pad = 'maxlen'``, `padlength`, `prepadlength` as well as `postpadlength` have to be either Boolean or `None` indicating the preferred padding location (pre-trial, post-trial or symmetrically pre- and post-trial). If all are `None`, symmetric padding is performed (see Examples for details). * 'nextpow2' : pad each trial to achieve a length equals the closest power of two of its original length. For ``pad = 'nextpow2'``, `padlength`, `prepadlength` as well as `postpadlength` have to be either Boolean or `None` indicating the preferred padding location (pre-trial, post-trial or symmetrically pre- and post-trial). If all are `None`, symmetric padding is performed (see Examples for details). padlength : None, bool or positive scalar Length to be padded to `data` (if `padlength` is scalar-valued) or padding location (if `padlength` is Boolean). Depending on the value of `pad`, `padlength` can be used to pre-pend (if `padlength` is a positive number and `prepadlength` is `True`) or append trials (if `padlength` is a positive number and `postpadlength` is `True`). If neither `prepadlength` nor `postpadlength` are specified (i.e, both are `None`), symmetric pre- and post-trial padding is performed (i.e., ``0.5 * padlength`` before and after each trial - note that odd sample counts are rounded downward to the nearest even integer). If ``unit = 'time'``, `padlength` is assumed to be given in seconds, otherwise (``unit = 'samples'``), `padlength` is interpreted as sample-count. Note that only ``pad = 'relative'`` and ``pad = 'absolute'`` support numeric values of `padlength`. prepadlength : None, bool or positive scalar Length to be pre-pended before each trial (if `prepadlength` is scalar-valued) or pre-padding flag (if `prepadlength` is `True`). If `prepadlength` is `True`, pre-padding length is either directly inferred from `padlength` or implicitly derived from chosen padding mode defined by `pad`. If ``unit = 'time'``, `prepadlength` is assumed to be given in seconds, otherwise (``unit = 'samples'``), `prepadlength` is interpreted as sample-count. Note that only ``pad = 'relative'`` supports numeric values of `prepadlength`. postpadlength : None, bool or positive scalar Length to be appended after each trial (if `postpadlength` is scalar-valued) or post-padding flag (if `postpadlength` is `True`). If `postpadlength` is `True`, post-padding length is either directly inferred from `padlength` or implicitly derived from chosen padding mode defined by `pad`. If ``unit = 'time'``, `postpadlength` is assumed to be given in seconds, otherwise (``unit = 'samples'``), `postpadlength` is interpreted as sample-count. Note that only ``pad = 'relative'`` supports numeric values of `postpadlength`. unit : str Unit of numerical values given by `padlength` and/or `prepadlength` and/or `postpadlength`. If ``unit = 'time'``, `padlength`, `prepadlength`, and `postpadlength` are assumed to be given in seconds, otherwise (``unit = 'samples'``), `padlength`, `prepadlength`, and `postpadlength` are interpreted as sample-counts. **Note** Providing padding lengths in seconds (i.e., ``unit = 'time'``) is only supported if `data` is a Syncopy object. create_new : bool If `True`, a padded copy of the same type as `data` is returned (a :class:`numpy.ndarray` or Syncopy object). If `create_new` is `False`, either a single dictionary (if `data` is a :class:`numpy.ndarray`) or a ``len(data.trials)``-long list of dictionaries (if `data` is a Syncopy object) with all necessary options for performing the actual padding operation with :func:`numpy.pad` is returned. Returns ------- pad_dict : dict, if `data` is a :class:`numpy.ndarray` and ``create_new = False`` Dictionary whose items contain all necessary parameters for calling :func:`numpy.pad` to perform the desired padding operation on `data`. pad_dicts : list, if `data` is a Syncopy object and ``create_new = False`` List of dictionaries for calling :func:`numpy.pad` to perform the desired padding operation on all trials found in `data`. out : :class:`numpy.ndarray`, if `data` is a :class:`numpy.ndarray` and ``create_new = True`` Padded version (deep copy) of `data` out : Syncopy object, if `data` is a Syncopy object and ``create_new = True`` Padded version (deep copy) of `data` Notes ----- This method emulates (and extends) FieldTrip's `ft_preproc_padding` by providing a convenience wrapper for NumPy's :func:`numpy.pad` that performs the actual heavy lifting. Examples -------- Consider the following small array representing a toy-problem-trial of `ns` samples across `nc` channels: >>> nc = 7; ns = 30 >>> trl = np.random.randn(ns, nc) We start by padding a total of 10 zeros symmetrically to `trl` >>> padded = spy.padding(trl, 'zero', pad='relative', padlength=10) >>> padded[:6, :] array([[ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [-1.0866, 2.3358, 0.8758, 0.5196, 0.8049, -0.659 , -0.9173]]) >>> padded[-6:, :] array([[ 0.027 , 1.8069, 1.5249, -0.7953, -0.8933, 1.0202, -0.6862], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [ 0. , 0. , 0. , 0. , 0. , 0. , 0. ]]) >>> padded.shape (40, 7) Note that the above call is equivalent to >>> padded_ident = spy.padding(trl, 'zero', pad='relative', padlength=10, prepadlength=True, postpadlength=True) >>> np.array_equal(padded_ident, padded) True >>> padded_ident = spy.padding(trl, 'zero', pad='relative', prepadlength=5, postpadlength=5) >>> np.array_equal(padded_ident, padded) True Similarly, >>> prepad = spy.padding(trl, 'nan', pad='relative', prepadlength=10) is the same as >>> prepad_ident = spy.padding(trl, 'nan', pad='relative', padlength=10, prepadlength=True) >>> np.allclose(prepad, prepad_ident, equal_nan=True) True Define bogus trials on `trl` and create a dummy object with unit samplerate >>> tdf = np.vstack([np.arange(0, ns, 5), np.arange(5, ns + 5, 5), np.ones((int(ns / 5), )), np.ones((int(ns / 5), )) * np.pi]).T >>> adata = spy.AnalogData(trl, trialdefinition=tdf, samplerate=1) Pad each trial to the closest power of two by appending by-trial channel averages. However, do not perform actual padding, but only prepare dictionaries of parameters to be passed on to :func:`numpy.pad` >>> pad_dicts = spy.padding(adata, 'mean', pad='nextpow2', postpadlength=True, create_new=False) >>> len(pad_dicts) == len(adata.trials) True >>> pad_dicts[0] {'pad_width': array([[0, 3], [0, 0]]), 'mode': 'mean'} Similarly, the following call generates a list of dictionaries preparing absolute padding by prepending zeros with :func:`numpy.pad` >>> pad_dicts = spy.padding(adata, 'zero', pad='absolute', padlength=10, prepadlength=True, create_new=False) >>> pad_dicts[0] {'pad_width': array([[5, 0], [0, 0]]), 'mode': 'constant', 'constant_values': 0} See also -------- numpy.pad : fast array padding in NumPy """ # Detect whether input is data object or array-like if any(["BaseData" in str(base) for base in data.__class__.__mro__]): try: data_parser(data, varname="data", dataclass="AnalogData", empty=False) except Exception as exc: raise exc timeAxis = data.dimord.index("time") spydata = True elif data.__class__.__name__ == "FauxTrial": if len(data.shape) != 2: lgl = "two-dimensional AnalogData trial segment" act = "{}-dimensional trial segment" raise SPYValueError(legal=lgl, varname="data", actual=act.format(len(data.shape))) timeAxis = data.dimord.index("time") spydata = False else: try: array_parser(data, varname="data", dims=2) except Exception as exc: raise exc timeAxis = 0 spydata = False # FIXME: Creation of new spy-object currently not supported if not isinstance(create_new, bool): raise SPYTypeError(create_new, varname="create_new", expected="bool") if spydata and create_new: raise NotImplementedError( "Creation of padded spy objects currently not supported. ") # Use FT-compatible options (sans FT option 'remove') if not isinstance(padtype, str): raise SPYTypeError(padtype, varname="padtype", expected="string") options = ["zero", "nan", "mean", "localmean", "edge", "mirror"] if padtype not in options: lgl = "'" + "or '".join(opt + "' " for opt in options) raise SPYValueError(legal=lgl, varname="padtype", actual=padtype) # Check `pad` and ensure we can actually perform the requested operation if not isinstance(pad, str): raise SPYTypeError(pad, varname="pad", expected="string") options = ["absolute", "relative", "maxlen", "nextpow2"] if pad not in options: lgl = "'" + "or '".join(opt + "' " for opt in options) raise SPYValueError(legal=lgl, varname="pad", actual=pad) if pad == "maxlen" and not spydata: lgl = "syncopy data object when using option 'maxlen'" raise SPYValueError(legal=lgl, varname="pad", actual="maxlen") # Make sure a data object was provided if we're working with time values if not isinstance(unit, str): raise SPYTypeError(unit, varname="unit", expected="string") options = ["samples", "time"] if unit not in options: lgl = "'" + "or '".join(opt + "' " for opt in options) raise SPYValueError(legal=lgl, varname="unit", actual=unit) if unit == "time" and not spydata: raise SPYValueError( legal="syncopy data object when using option 'time'", varname="unit", actual="time") # Set up dictionary for type-checking of provided padding lengths nt_dict = {"samples": "int_like", "time": None} # If we're padding up to an absolute bound or the max. length across # trials, compute lower bound for padding (in samples or seconds) if pad in ["absolute", "maxlen"]: if spydata: maxTrialLen = np.diff(data.sampleinfo).max() else: maxTrialLen = data.shape[ timeAxis] # if `pad="absolute" and data is array else: maxTrialLen = np.inf if unit == "time": padlim = maxTrialLen / data.samplerate else: padlim = maxTrialLen # To ease option processing, collect padding length keywords in dict plengths = { "padlength": padlength, "prepadlength": prepadlength, "postpadlength": postpadlength } # In case of relative padding, we need at least one scalar value to proceed if pad == "relative": # If `padlength = None`, pre- or post- need to be set; if `padlength` # is set, both pre- and post- need to be `None` or `True`/`False`. # After this code block, pre- and post- are guaranteed to be numeric. if padlength is None: for key in ["prepadlength", "postpadlength"]: if plengths[key] is not None: try: scalar_parser(plengths[key], varname=key, ntype=nt_dict[unit], lims=[0, np.inf]) except Exception as exc: raise exc else: plengths[key] = 0 else: try: scalar_parser(padlength, varname="padlength", ntype=nt_dict[unit], lims=[0, np.inf]) except Exception as exc: raise exc for key in ["prepadlength", "postpadlength"]: if not isinstance(plengths[key], (bool, type(None))): raise SPYTypeError(plengths[key], varname=key, expected="bool or None") if prepadlength is None and postpadlength is None: prepadlength = True postpadlength = True else: prepadlength = prepadlength is not None postpadlength = postpadlength is not None if prepadlength and postpadlength: plengths["prepadlength"] = padlength / 2 plengths["postpadlength"] = padlength / 2 else: plengths["prepadlength"] = prepadlength * padlength plengths["postpadlength"] = postpadlength * padlength # Under-determined: abort if requested padding length is 0 if all(value == 0 for value in plengths.values() if value is not None): lgl = "either non-zero value of `padlength` or `prepadlength` " + \ "and/or `postpadlength` to be set" raise SPYValueError(legal=lgl, varname="padlength", actual="0|None") else: # For absolute padding, the desired length has to be >= max. trial length if pad == "absolute": try: scalar_parser(padlength, varname="padlength", ntype=nt_dict[unit], lims=[padlim, np.inf]) except Exception as exc: raise exc for key in ["prepadlength", "postpadlength"]: if not isinstance(plengths[key], (bool, type(None))): raise SPYTypeError(plengths[key], varname=key, expected="bool or None") # For `maxlen` or `nextpow2` we don't want any numeric entries at all else: for key, value in plengths.items(): if not isinstance(value, (bool, type(None))): raise SPYTypeError(value, varname=key, expected="bool or None") # Warn of potential conflicts if padlength and (prepadlength or postpadlength): msg = "Found `padlength` and `prepadlength` and/or " +\ "`postpadlength`. Symmetric padding is performed. " SPYWarning(msg) # If both pre-/post- are `None`, set them to `True` to use symmetric # padding, otherwise convert `None` entries to `False` if prepadlength is None and postpadlength is None: plengths["prepadlength"] = True plengths["postpadlength"] = True else: plengths["prepadlength"] = plengths["prepadlength"] is not None plengths["postpadlength"] = plengths["postpadlength"] is not None # Update pre-/post-padding and (if required) convert time to samples prepadlength = plengths["prepadlength"] postpadlength = plengths["postpadlength"] if unit == "time": if pad == "relative": prepadlength = int(prepadlength * data.samplerate) postpadlength = int(postpadlength * data.samplerate) elif pad == "absolute": padlength = int(padlength * data.samplerate) # Construct dict of keywords for ``np.pad`` depending on chosen `padtype` kws = { "zero": { "mode": "constant", "constant_values": 0 }, "nan": { "mode": "constant", "constant_values": np.nan }, "localmean": { "mode": "mean", "stat_length": -1 }, "mean": { "mode": "mean" }, "edge": { "mode": "edge" }, "mirror": { "mode": "reflect" } } # If in put was syncopy data object, padding is done on a per-trial basis if spydata: # A list of input keywords for ``np.pad`` is constructed, no matter if # we actually want to build a new object or not pad_opts = [] for trl in data.trials: nSamples = trl.shape[timeAxis] if pad == "absolute": padding = (padlength - nSamples) / (prepadlength + postpadlength) elif pad == "relative": padding = True elif pad == "maxlen": padding = (maxTrialLen - nSamples) / (prepadlength + postpadlength) elif pad == "nextpow2": padding = (_nextpow2(nSamples) - nSamples) / (prepadlength + postpadlength) pw = np.zeros((2, 2), dtype=int) pw[timeAxis, :] = [prepadlength * padding, postpadlength * padding] pad_opts.append(dict({"pad_width": pw}, **kws[padtype])) if padtype == "localmean": pad_opts[-1]["stat_length"] = pw[timeAxis, :] if create_new: pass else: return pad_opts # Input was a array/FauxTrial (i.e., single trial) - we have to do the padding just once else: nSamples = data.shape[timeAxis] if pad == "absolute": padding = (padlength - nSamples) / (prepadlength + postpadlength) elif pad == "relative": padding = True elif pad == "nextpow2": padding = (_nextpow2(nSamples) - nSamples) / (prepadlength + postpadlength) pw = np.zeros((2, 2), dtype=int) pw[timeAxis, :] = [prepadlength * padding, postpadlength * padding] pad_opts = dict({"pad_width": pw}, **kws[padtype]) if padtype == "localmean": pad_opts["stat_length"] = pw[timeAxis, :] if create_new: if isinstance(data, np.ndarray): return np.pad(data, **pad_opts) else: # FIXME: currently only supports FauxTrial shp = list(data.shape) shp[timeAxis] += pw[timeAxis, :].sum() idx = list(data.idx) if isinstance(idx[timeAxis], slice): idx[timeAxis] = slice(idx[timeAxis].start, idx[timeAxis].start + shp[timeAxis]) else: idx[timeAxis] = pw[timeAxis, 0] * [idx[timeAxis][0]] + idx[timeAxis] \ + pw[timeAxis, 1] * [idx[timeAxis][-1]] return data.__class__(shp, idx, data.dtype, data.dimord) else: return pad_opts
def scalar_parser(var, varname="", ntype=None, lims=None): """ Parse scalars Parameters ---------- var : scalar Scalar quantity to verify varname : str Local variable name used in caller, see Examples for details. ntype : None or str Expected numerical type of `var`. Possible options include any valid builtin type as well as `"int_like"` (`var` is expected to have no significant digits after its decimal point, e.g., 3.0, -12.0 etc.). If `ntype` is `None` the numerical type of `var` is not checked. lims : None or two-element list_like Lower (`lims[0]`) and upper (`lims[1]`) bounds for legal values of `var`. Note that the code checks for non-strict inequality, i.e., `var = lims[0]` or `var = lims[1]` are both considered to be valid values of `var`. Using `lims = [-np.inf, np.inf]` may be employed to ensure that `var` is finite and non-NaN. For complex scalars bounds-checking is performed element-wise, that is both real and imaginary part of `var` have to be inside the bounds provided by `lims` (see Examples for details). If `lims` is `None` bounds-checking is not performed. Returns ------- Nothing : None Examples -------- Assume `freq` is supposed to be a scalar with integer-like values between 10 and 1000. The following calls confirm the validity of `freq` >>> freq = 440 >>> scalar_parser(freq, varname="freq", ntype="int_like", lims=[10, 1000]) >>> freq = 440.0 >>> scalar_parser(freq, varname="freq", ntype="int_like", lims=[10, 1000]) Conversely, these values of `freq` yield errors >>> freq = 440.5 # not integer-like >>> scalar_parser(freq, varname="freq", ntype="int_like", lims=[10, 1000]) >>> freq = 2 # outside bounds >>> scalar_parser(freq, varname="freq", ntype="int_like", lims=[10, 1000]) >>> freq = '440' # not a scalar >>> scalar_parser(freq, varname="freq", ntype="int_like", lims=[10, 1000]) For complex scalars bounds-checking is performed element-wise on both real and imaginary part: >>> scalar_parser(complex(2,-1), lims=[-3, 5]) # valid >>> scalar_parser(complex(2,-1), lims=[-3, 1]) # invalid since real part is greater than 1 See also -------- array_parser : similar functionality for parsing array-like objects """ # Make sure `var` is a scalar-like number if not isinstance(var, numbers.Number): raise SPYTypeError(var, varname=varname, expected="scalar") # If required, parse type ("int_like" is a bit of a special case here...) if ntype is not None: if ntype == "int_like": if np.round(var) != var: raise SPYValueError(ntype, varname=varname, actual=str(var)) else: if type(var) != getattr(__builtins__, ntype): raise SPYTypeError(var, varname=varname, expected=ntype) # If required perform bounds-check: transform scalar to NumPy array # to be able to handle complex scalars too if lims is not None: if isinstance(var, complex): val = np.array([var.real, var.imag]) legal = "both real and imaginary part to be " else: val = np.array([var]) legal = "value to be " if np.any(val < lims[0]) or np.any(val > lims[1]) or not np.isfinite(var): legal += "greater or equals {lb:s} and less or equals {ub:s}" raise SPYValueError(legal.format(lb=str(lims[0]), ub=str(lims[1])), varname=varname, actual=str(var)) return
def io_parser(fs_loc, varname="", isfile=True, ext="", exists=True): """ Parse file-system location strings for reading/writing files/directories Parameters ---------- fs_loc : str String pointing to (hopefully valid) file-system location (absolute/relative path of file or directory ). varname : str Local variable name used in caller, see Examples for details. isfile : bool Indicates whether `fs_loc` points to a file (`isfile = True`) or directory (`isfile = False`) ext : str or 1darray-like Valid filename extension(s). Can be a single string (e.g., `ext = "lfp"`) or a list/1darray of valid extensions (e.g., `ext = ["lfp", "mua"]`). exists : bool If `exists = True` ensure that file-system location specified by `fs_loc` exists (typically used when reading from `fs_loc`), otherwise (`exists = False`) check for already present conflicting files/directories (typically used when creating/writing to `fs_loc`). Returns ------- fs_path : str Absolute path of `fs_loc`. fs_name : str (only if `isfile = True`) Name (including extension) of input file (without path). Examples -------- To test whether `"/path/to/dataset.lfp"` points to an existing file, one might use >>> io_parser("/path/to/dataset.lfp") '/path/to', 'dataset.lfp' The following call ensures that a folder called "mydata" can be safely created in the current working directory >>> io_parser("mydata", isfile=False, exists=False) '/path/to/cwd/mydata' Suppose a routine wants to save data to a file with potential extensions `".lfp"` or `".mua"`. The following call may be used to ensure the user input `dsetname = "relative/dir/dataset.mua"` is a valid choice: >>> abs_path, filename = io_parser(dsetname, varname="dsetname", ext=["lfp", "mua"], exists=False) >>> abs_path '/full/path/to/relative/dir/' >>> filename 'dataset.mua' """ # Start by resovling potential conflicts if not isfile and len(ext) > 0: msg = "filename extension(s) specified but `isfile = False`. Exiting..." SPYWarning(msg) return # Make sure `fs_loc` is actually a string if not isinstance(fs_loc, str): raise SPYTypeError(fs_loc, varname=varname, expected=str) # Avoid headaches, use absolute paths... fs_loc = os.path.abspath(os.path.expanduser(fs_loc)) # Ensure that filesystem object does/does not exist if exists and not os.path.exists(fs_loc): raise SPYIOError(fs_loc, exists=False) if not exists and os.path.exists(fs_loc): raise SPYIOError(fs_loc, exists=True) # First, take care of directories... if not isfile: isdir = os.path.isdir(fs_loc) if (isdir and not exists): raise SPYIOError (fs_loc, exists=isdir) elif (not isdir and exists): raise SPYValueError(legal="directory", actual="file") else: return fs_loc # ...now files else: # Separate filename from its path file_name = os.path.basename(fs_loc) # If wanted, parse filename extension(s) if len(ext): # Extract filename extension and get rid of its dot file_ext = os.path.splitext(file_name)[1] file_ext = file_ext.replace(".", "") # In here, having no extension counts as an error error = False if len(file_ext) == 0: error = True if file_ext not in str(ext) or error: if isinstance(ext, (list, np.ndarray)): ext = "'" + "or '".join(ex + "' " for ex in ext) raise SPYValueError(ext, varname="filename-extension", actual=file_ext) # Now make sure file does or does not exist isfile = os.path.isfile(fs_loc) if (isfile and not exists): raise SPYIOError(fs_loc, exists=isfile) elif (not isfile and exists): raise SPYValueError(legal="file", actual="directory") else: return fs_loc.split(file_name)[0], file_name
def definetrial(obj, trialdefinition=None, pre=None, post=None, start=None, trigger=None, stop=None, clip_edges=False): """(Re-)define trials of a Syncopy data object Data can be structured into trials based on timestamps of a start, trigger and end events:: start trigger stop |---- pre ----|--------|---------|--- post----| Parameters ---------- obj : Syncopy data object (:class:`BaseData`-like) trialdefinition : :class:`EventData` object or Mx3 array [start, stop, trigger_offset] sample indices for `M` trials pre : float offset time (s) before start event post : float offset time (s) after end event start : int event code (id) to be used for start of trial stop : int event code (id) to be used for end of trial trigger : event code (id) to be used center (t=0) of trial clip_edges : bool trim trials to actual data-boundaries. Returns ------- Syncopy data object (:class:`BaseData`-like)) Notes ----- :func:`definetrial` supports the following argument combinations: >>> # define M trials based on [start, end, offset] indices >>> definetrial(obj, trialdefinition=[M x 3] array) >>> # define trials based on event codes stored in <:class:`EventData` object> >>> definetrial(obj, trialdefinition=<EventData object>, pre=0, post=0, start=startCode, stop=stopCode, trigger=triggerCode) >>> # apply same trial definition as defined in <:class:`EventData` object> >>> definetrial(<AnalogData object>, trialdefinition=<EventData object w/sampleinfo/t0/trialinfo>) >>> # define whole recording as single trial >>> definetrial(obj, trialdefinition=None) """ # Start by vetting input object try: data_parser(obj, varname="obj") except Exception as exc: raise exc if obj.data is None: lgl = "non-empty Syncopy data object" act = "empty Syncopy data object" raise SPYValueError(legal=lgl, varname="obj", actual=act) # Check array/object holding trial specifications if trialdefinition is not None: if trialdefinition.__class__.__name__ == "EventData": try: data_parser(trialdefinition, varname="trialdefinition", writable=None, empty=False) except Exception as exc: raise exc evt = True else: try: array_parser(trialdefinition, varname="trialdefinition", dims=2) except Exception as exc: raise exc if any([ "ContinuousData" in str(base) for base in obj.__class__.__mro__ ]): scount = obj.data.shape[obj.dimord.index("time")] else: scount = np.inf try: array_parser(trialdefinition[:, :2], varname="sampleinfo", dims=(None, 2), hasnan=False, hasinf=False, ntype="int_like", lims=[0, scount]) except Exception as exc: raise exc trl = np.array(trialdefinition, dtype="float") ref = obj tgt = obj evt = False else: # Construct object-class-specific `trl` arrays treating data-set as single trial if any( ["ContinuousData" in str(base) for base in obj.__class__.__mro__]): trl = np.array([[0, obj.data.shape[obj.dimord.index("time")], 0]]) else: sidx = obj.dimord.index("sample") trl = np.array([[ np.nanmin(obj.data[:, sidx]), np.nanmax(obj.data[:, sidx]), 0 ]]) ref = obj tgt = obj evt = False # AnalogData + EventData w/sampleinfo if obj.__class__.__name__ == "AnalogData" and evt and trialdefinition.sampleinfo is not None: if obj.samplerate is None or trialdefinition.samplerate is None: lgl = "non-`None` value - make sure `samplerate` is set before defining trials" act = "None" raise SPYValueError(legal=lgl, varname="samplerate", actual=act) ref = trialdefinition tgt = obj trl = np.array(ref.trialinfo) t0 = np.array(ref._t0).reshape((ref._t0.size, 1)) trl = np.hstack([ref.sampleinfo, t0, trl]) trl = np.round((trl / ref.samplerate) * tgt.samplerate).astype(int) # AnalogData + EventData w/keywords or just EventData w/keywords if any([kw is not None for kw in [pre, post, start, trigger, stop]]): # Make sure we actually have valid data objects to work with if obj.__class__.__name__ == "EventData" and evt is False: ref = obj tgt = obj elif obj.__class__.__name__ == "AnalogData" and evt is True: ref = trialdefinition tgt = obj else: lgl = "AnalogData with associated EventData object" act = "{} and {}".format(obj.__class__.__name__, trialdefinition.__class__.__name__) raise SPYValueError(legal=lgl, actual=act, varname="input") # The only case we might actually need it: ensure `clip_edges` is valid if not isinstance(clip_edges, bool): raise SPYTypeError(clip_edges, varname="clip_edges", expected="Boolean") # Ensure that objects have their sampling-rates set, otherwise break if ref.samplerate is None or tgt.samplerate is None: lgl = "non-`None` value - make sure `samplerate` is set before defining trials" act = "None" raise SPYValueError(legal=lgl, varname="samplerate", actual=act) # Get input dimensions szin = [] for var in [pre, post, start, trigger, stop]: if isinstance(var, (np.ndarray, list)): szin.append(len(var)) if np.unique(szin).size > 1: lgl = "all trial-related arrays to have the same length" act = "arrays with sizes {}".format( str(np.unique(szin)).replace("[", "").replace("]", "")) raise SPYValueError(legal=lgl, varname="trial-keywords", actual=act) if len(szin): ntrials = szin[0] ninc = 1 else: ntrials = 1 ninc = 0 # If both `pre` and `start` or `post` and `stop` are `None`, abort if (pre is None and start is None) or (post is None and stop is None): lgl = "`pre` or `start` and `post` or `stop` to be not `None`" act = "both `pre` and `start` and/or `post` and `stop` are simultaneously `None`" raise SPYValueError(legal=lgl, actual=act) if (trigger is None) and (pre is not None or post is not None): lgl = "non-None `trigger` with `pre`/`post` timing information" act = "`trigger` = `None`" raise SPYValueError(legal=lgl, actual=act) # If provided, ensure keywords make sense, otherwise allocate defaults kwrds = {} vdict = { "pre": { "var": pre, "hasnan": False, "ntype": None, "fillvalue": 0 }, "post": { "var": post, "hasnan": False, "ntype": None, "fillvalue": 0 }, "start": { "var": start, "hasnan": None, "ntype": "int_like", "fillvalue": np.nan }, "trigger": { "var": trigger, "hasnan": None, "ntype": "int_like", "fillvalue": np.nan }, "stop": { "var": stop, "hasnan": None, "ntype": "int_like", "fillvalue": np.nan } } for vname, opts in vdict.items(): if opts["var"] is not None: if isinstance(opts["var"], numbers.Number): try: scalar_parser(opts["var"], varname=vname, ntype=opts["ntype"], lims=[-np.inf, np.inf]) except Exception as exc: raise exc opts["var"] = np.full((ntrials, ), opts["var"]) else: try: array_parser(opts["var"], varname=vname, hasinf=False, hasnan=opts["hasnan"], ntype=opts["ntype"], dims=(ntrials, )) except Exception as exc: raise exc kwrds[vname] = opts["var"] else: kwrds[vname] = np.full((ntrials, ), opts["fillvalue"]) # Prepare `trl` and convert event-codes + sample-numbers to lists trl = [] evtid = list(ref.data[:, ref.dimord.index("eventid")]) evtsp = list(ref.data[:, ref.dimord.index("sample")]) nevents = len(evtid) searching = True trialno = 0 cnt = 0 act = "" # Do this line-by-line: halt on error (if event-id is not found in `ref`) while searching: # Allocate begin and end of trial begin = None end = None t0 = 0 idxl = [] # First, try to assign `start`, then `t0` if not np.isnan(kwrds["start"][trialno]): try: sidx = evtid.index(kwrds["start"][trialno]) except: act = str(kwrds["start"][trialno]) vname = "start" break begin = evtsp[sidx] / ref.samplerate evtid[sidx] = -np.pi idxl.append(sidx) if not np.isnan(kwrds["trigger"][trialno]): try: idx = evtid.index(kwrds["trigger"][trialno]) except: act = str(kwrds["trigger"][trialno]) vname = "trigger" break t0 = evtsp[idx] / ref.samplerate evtid[idx] = -np.pi idxl.append(idx) # Trial-begin is either `trigger - pre` or `start - pre` if begin is not None: begin -= kwrds["pre"][trialno] else: begin = t0 - kwrds["pre"][trialno] # Try to assign `stop`, if we got nothing, use `t0 + post` if not np.isnan(kwrds["stop"][trialno]): evtid[:sidx] = [np.pi] * sidx try: idx = evtid.index(kwrds["stop"][trialno]) except: act = str(kwrds["stop"][trialno]) vname = "stop" break end = evtsp[idx] / ref.samplerate + kwrds["post"][trialno] evtid[idx] = -np.pi idxl.append(idx) else: end = t0 + kwrds["post"][trialno] # Off-set `t0` t0 -= begin # Make sure current trial setup makes (some) sense if begin >= end: lgl = "non-overlapping trial begin-/end-samples" act = "trial-begin at {}, trial-end at {}".format( str(begin), str(end)) raise SPYValueError(legal=lgl, actual=act) # Finally, write line of `trl` trl.append([begin, end, t0]) # Update counters and end this mess when we're done trialno += ninc cnt += 1 evtsp = evtsp[max(idxl, default=-1) + 1:] evtid = evtid[max(idxl, default=-1) + 1:] if trialno == ntrials or cnt == nevents: searching = False # Abort if the above loop ran into troubles if len(trl) < ntrials: if len(act) > 0: raise SPYValueError(legal="existing event-id", varname=vname, actual=act) # Make `trl` a NumPy array trl = np.round(np.array(trl) * tgt.samplerate).astype(int) # If appropriate, clip `trl` to AnalogData object's bounds (if wanted) if clip_edges and evt: msk = trl[:, 0] < 0 trl[msk, 0] = 0 dmax = tgt.data.shape[tgt.dimord.index("time")] msk = trl[:, 1] > dmax trl[msk, 1] = dmax if np.any(trl[:, 0] >= trl[:, 1]): lgl = "non-overlapping trials" act = "some trials are overlapping after clipping to AnalogData object range" raise SPYValueError(legal=lgl, actual=act) # The triplet `sampleinfo`, `t0` and `trialinfo` works identically for # all data genres if trl.shape[1] < 3: raise SPYValueError( "array of shape (no. of trials, 3+)", varname="trialdefinition", actual="shape = {shp:s}".format(shp=str(trl.shape))) # Finally: assign `sampleinfo`, `t0` and `trialinfo` (and potentially `trialid`) tgt._trialdefinition = trl # In the discrete case, we have some additinal work to do if any(["DiscreteData" in str(base) for base in tgt.__class__.__mro__]): # Compute trial-IDs by matching data samples with provided trial-bounds samples = tgt.data[:, tgt.dimord.index("sample")] starts = tgt.sampleinfo[:, 0] ends = tgt.sampleinfo[:, 1] startids = np.searchsorted(starts, samples, side="right") endids = np.searchsorted(ends, samples, side="left") mask = startids == endids startids -= 1 # Samples not belonging into any trial get a trial-ID of -1 startids[mask] = int(startids.min() <= 0) * (-1) tgt.trialid = startids # Write log entry if ref == tgt: ref.log = "updated trial-definition with [" \ + " x ".join([str(numel) for numel in trl.shape]) \ + "] element array" else: ref_log = ref._log.replace("\n\n", "\n\t") tgt.log = "trial-definition extracted from EventData object: " tgt._log += ref_log tgt.cfg = { "method": sys._getframe().f_code.co_name, "EventData object": ref.cfg } ref.log = "updated trial-defnition of {} object".format( tgt.__class__.__name__) return
def csd(trl_dat, samplerate=1, nSamples=None, taper="hann", taper_opt=None, norm=False, fullOutput=False): """ Single trial Fourier cross spectral estimates between all channels of the input data. First all the individual Fourier transforms are calculated via a (multi-)tapered FFT, then the pairwise cross-spectra are computed. Averaging over tapers is done implicitly for multi-taper analysis with `taper="dpss"`. Output consists of all (``nChannels x nChannels + 1) / 2`` different complex estimates arranged in a symmetric fashion (``CS_ij == CS_ji*``). The elements on the main diagonal (`CS_ii`) are the (real) auto-spectra. This is NOT the same as what is commonly referred to as "cross spectral density" as there is no (time) averaging!! Multi-tapering alone is not necessarily sufficient to get enough statitstical power for a robust csd estimate. Yet for completeness and testing the option ``norm = True`` returns a single-trial coherence estimate for ``taper = "dpss"``. Parameters ---------- trl_dat : (N, K) :class:`numpy.ndarray` Uniformly sampled multi-channel time-series data The 1st dimension is interpreted as the time axis, columns represent individual channels. samplerate : float Samplerate in Hz nSamples : int or None Absolute length of the (potentially to be padded) signals or `None` for no padding (`N` is the number of samples) taper : str or None Taper function to use, one of :module:`scipy.signal.windows` Set to `None` for no tapering. taper_opt : dict, optional Additional keyword arguments passed to the `taper` function. For multi-tapering with ``taper = 'dpss'`` set the keys `'Kmax'` and `'NW'`. For further details, please refer to the `SciPy docs <https://docs.scipy.org/doc/scipy/reference/signal.windows.html>`_ norm : bool, optional Set to `True` to normalize for a single-trial coherence measure. Only meaningful in a multi-taper (``taper = "dpss"``) setup and if no additional (trial-)averaging is performed afterwards. fullOutput : bool For backend testing or stand-alone applications, set to `True` to return also the `freqs` array. Returns ------- CS_ij : (nFreq, K, K) :class:`numpy.ndarray` Complex cross spectra for all channel combinations ``i,j``. `K` corresponds to number of input channels. freqs : (nFreq,) :class:`numpy.ndarray` The Fourier frequencies if ``fullOutput = True`` See also -------- normalize_csd : :func:`~syncopy.connectivity.csd.normalize_csd` Coherence from trial averages mtmfft : :func:`~syncopy.specest.mtmfft.mtmfft` (Multi-)tapered Fourier analysis """ # compute the individual spectra # specs have shape (nTapers x nFreq x nChannels) specs, freqs = mtmfft(trl_dat, samplerate, nSamples, taper, taper_opt) # outer product along channel axes # has shape (nTapers x nFreq x nChannels x nChannels) CS_ij = specs[:, :, np.newaxis, :] * specs[:, :, :, np.newaxis].conj() # average tapers and transpose: # now has shape (nChannels x nChannels x nFreq) CS_ij = CS_ij.mean(axis=0).T if norm: # only meaningful for multi-tapering if taper != 'dpss': msg = "Normalization of single trial csd only possible with taper='dpss'" raise SPYValueError(legal=msg, varname="taper", actual=taper) # main diagonal has shape (nChannels x nFreq): the auto spectra diag = CS_ij.diagonal() # get the needed product pairs of the autospectra Ciijj = np.sqrt(diag[:, :, None] * diag[:, None, :]).T CS_ij = CS_ij / Ciijj if fullOutput: return CS_ij.transpose(2, 0, 1), freqs else: return CS_ij.transpose(2, 0, 1)
def _prep_analog_plots(self, name, **inputArgs): """ Local helper that performs sanity checks and sets up data selection Parameters ---------- self : :class:`~syncopy.AnalogData` object Syncopy :class:`~syncopy.AnalogData` object that is being processed by the respective :meth:`.singlepanelplot` or :meth:`.multipanelplot` class methods defined in this module. name : str Name of caller (i.e., "singlepanelplot" or "multipanelplot") inputArgs : dict Input arguments of caller (i.e., :meth:`.singlepanelplot` or :meth:`.multipanelplot`) collected in dictionary Returns ------- dimArrs : tuple Tuple containing (in this order) `trList`, list of (selected) trials to visualize and `chArr`, 1D :class:`numpy.ndarray` of channel specifiers based on provided user selection. Note that `"all"` and `None` selections are converted to arrays ready for indexing. dimCounts : tuple Tuple holding sizes of corresponding selection arrays comprised in `dimArrs`. Elements are `nTrials`, number of (selected) trials and `nChan`, number of (selected) channels. idx : list Three element indexing list (respecting non-default `dimord`s) intended for use with trial-array data. timeIdx : int Position of time-axis within indexing list `idx` (either 0 or 1). chanIdx : int Position of channel-axis within indexing list `idx` (either 0 or 1). Notes ----- This is an auxiliary method that is intended purely for internal use. Please refer to the user-exposed methods :func:`~syncopy.singlepanelplot` and/or :func:`~syncopy.multipanelplot` to actually generate plots of Syncopy data objects. See also -------- :meth:`syncopy.plotting.spy_plotting._prep_plots` : General basic input parsing for all Syncopy plotting routines """ # Basic sanity checks for all plotting routines w/any Syncopy object _prep_plots(self, name, **inputArgs) # Ensure our binary flags are actually binary if not isinstance(inputArgs["avg_channels"], bool): raise SPYTypeError(inputArgs["avg_channels"], varname="avg_channels", expected="bool") if not isinstance(inputArgs.get("avg_trials", True), bool): raise SPYTypeError(inputArgs["avg_trials"], varname="avg_trials", expected="bool") # Pass provided selections on to `Selector` class which performs error # checking and generates required indexing arrays self._selection = { "trials": inputArgs["trials"], "channels": inputArgs["channels"], "toilim": inputArgs["toilim"] } # Ensure any optional keywords controlling plotting appearance make sense if inputArgs["title"] is not None: if not isinstance(inputArgs["title"], str): raise SPYTypeError(inputArgs["title"], varname="title", expected="str") if inputArgs["grid"] is not None: if not isinstance(inputArgs["grid"], bool): raise SPYTypeError(inputArgs["grid"], varname="grid", expected="bool") # Get trial and channel counts if inputArgs["trials"] is None: trList = [] nTrials = 0 if inputArgs["toilim"] is not None: lgl = "`trials` to be not `None` to perform timing selection" act = "`toilim` was provided but `trials` is `None`" raise SPYValueError(legal=lgl, varname="trials/toilim", actual=act) else: trList = self._selection.trials nTrials = len(trList) chArr = self.channel[self._selection.channel] nChan = chArr.size # Collect arrays and counts in tuples dimCounts = (nTrials, nChan) dimArrs = (trList, chArr) # Prepare indexing list respecting potential non-default `dimord`s idx = [slice(None), slice(None)] chanIdx = self.dimord.index("channel") timeIdx = self.dimord.index("time") idx[chanIdx] = self._selection.channel return dimArrs, dimCounts, idx, timeIdx, chanIdx
def best_match(source, selection, span=False, tol=None, squash_duplicates=False): """ Find matching elements in a given 1d-array/list Parameters ---------- source : NumPy 1d-array/list Reference array whose elements are to be matched by `selection` selection: NumPy 1d-array/list Array of query-values whose closest matches are to be found in `source`. Note that `source` and `selection` need not be the same length. span : bool If `True`, `selection` is interpreted as (closed) interval ``[lo, hi]`` and `source` is queried for all elements contained in the interval, i.e., ``lo <= src <= hi for src in source`` (typically used for `toilim`/`foilim`-like selections). tol : None or float If `None` for each component of `selection` the closest value in `source` is selected, e.g., for ``source = [10, 20]`` and ``selection = [-50, 0, 50]`` the closest values are `[10, 10, 20]`. If not `None`, ensures values in `selection` do not deviate further than `tol` from `source`. If any element `sel` of `selection` is outside a `tol`-neighborhood around `source`, i.e., ``np.abs(sel - source).max() >= tol``, a :class:`~syncopy.shared.errors.SPYValueError` is raised. squash_duplicates : bool If `True`, identical matches are removed from the result. Returns ------- values : NumPy 1darray Values of `source` that most closely match given elements in `selection` idx : NumPy 1darray Indices of `values` with respect to `source`, such that, ``source[idx] == values`` Notes ----- This is an auxiliary method that is intended purely for internal use. Thus, no error checking is performed. Examples -------- Exact matching, ordered `source` and `selection`: >>> best_match(np.arange(10), [2,5]) (array([2, 5]), array([2, 5])) Inexact matching, ordered `source` and `selection`: >>> source = np.arange(10) >>> selection = np.array([1.5, 1.5, 2.2, 6.2, 8.8]) >>> best_match(source, selection) (array([2, 2, 2, 6, 9]), array([2, 2, 2, 6, 9])) Inexact matching, unordered `source` and `selection`: >>> source = np.array([2.2, 1.5, 1.5, 6.2, 8.8]) >>> selection = np.array([1.9, 9., 1., -0.4, 1.2, 0.2, 9.3]) >>> best_match(source, selection) (array([2.2, 8.8, 1.5, 1.5, 1.5, 1.5, 8.8]), array([0, 4, 1, 1, 1, 1, 4])) Same as above, but ignore duplicate matches >>> best_match(source, selection, squash_duplicates=True) (array([2.2, 8.8, 1.5]), array([0, 4, 1])) Interval-matching: >>> best_match(np.arange(10), [2.9, 6.1], span=True) (array([3, 4, 5, 6]), array([3, 4, 5, 6])) """ # Make `source` a NumPy array if necessary if isinstance(source, list): source = np.array(source) # Ensure selection is within `tol` bounds from `source` if tol is not None: if not np.all( [np.all((np.abs(source - value)) < tol) for value in selection]): lgl = "all elements of `selection` to be within a {0:2.4f}-band around `source`" act = "values in `selection` deviating further than given tolerance " +\ "of {0:2.4f} from source" raise SPYValueError(legal=lgl.format(tol), varname="selection", actual=act.format(tol)) # Do not perform O(n) potentially unnecessary sort operations... issorted = True # Interval-selections are a lot easier than discrete time-points... if span: idx = np.intersect1d( np.where(source >= selection[0])[0], np.where(source <= selection[1])[0]) else: issorted = True if np.diff(source).min() < 0: issorted = False orig = np.array(source, copy=True) idx_orig = np.argsort(orig) source = orig[idx_orig] idx = np.searchsorted(source, selection, side="left") leftNbrs = np.abs( selection - source[np.maximum(idx - 1, np.zeros(idx.shape, dtype=np.intp))]) rightNbrs = np.abs(selection - source[np.minimum( idx, np.full(idx.shape, source.size - 1, dtype=np.intp))]) shiftLeft = ((idx == source.size) | (leftNbrs < rightNbrs)) idx[shiftLeft] -= 1 # Account for potentially unsorted selections (and thus unordered `idx`) if squash_duplicates: _, xdi = np.unique(idx.astype(np.intp), return_index=True) idx = idx[np.sort(xdi)] # Re-order discrete-selection index arrays in case `source` was unsorted if not issorted and not span: idx_sort = idx_orig[idx] return orig[idx_sort], idx_sort else: return source[idx], idx
def connectivityanalysis(data, method="coh", keeptrials=False, output="abs", foi=None, foilim=None, pad_to_length=None, polyremoval=None, taper="hann", tapsmofrq=None, nTaper=None, out=None, **kwargs): """ Perform connectivity analysis of Syncopy :class:`~syncopy.AnalogData` objects **Usage Summary** Options available in all analysis methods: * **foi**/**foilim** : frequencies of interest; either array of frequencies or frequency window (not both) * **polyremoval** : de-trending method to use (0 = mean, 1 = linear or `None`) List of available analysis methods and respective distinct options: "coh" : (Multi-) tapered coherency estimate Compute the normalized cross spectral densities between all channel combinations * **output** : one of ('abs', 'pow', 'fourier') * **taper** : one of :data:`~syncopy.shared.const_def.availableTapers` * **tapsmofrq** : spectral smoothing box for slepian tapers (in Hz) * **nTaper** : (optional) number of orthogonal tapers for slepian tapers * **pad_to_length**: either pad to an absolute length or set to `'nextpow2'` "corr" : Cross-correlations Computes the one sided (positive lags) cross-correlations between all channel combinations. The maximal lag is half the trial lengths. * **keeptrials** : set to `True` for single trial cross-correlations "granger" : Spectral Granger-Geweke causality Computes linear causality estimates between all channel combinations. The intermediate cross-spectral densities can be computed via multi-tapering. * **taper** : one of :data:`~syncopy.shared.const_def.availableTapers` * **tapsmofrq** : spectral smoothing box for slepian tapers (in Hz) * **nTaper** : (optional, not recommended) number of slepian tapers * **pad_to_length**: either pad to an absolute length or set to `'nextpow2'` Parameters ---------- data : `~syncopy.AnalogData` A non-empty Syncopy :class:`~syncopy.datatype.AnalogData` object method : str Connectivity estimation method, one of 'coh', 'corr', 'granger' output : str Relevant for cross-spectral density estimation (`method='coh'`) Use `'pow'` for absolute squared coherence, `'abs'` for absolute value of coherence and`'fourier'` for the complex valued coherency. keeptrials : bool Relevant for cross-correlations (`method='corr'`). If `True` single-trial cross-correlations are returned. foi : array-like or None Frequencies of interest (Hz) for output. If desired frequencies cannot be matched exactly, the closest possible frequencies are used. If `foi` is `None` or ``foi = "all"``, all attainable frequencies (i.e., zero to Nyquist / 2) are selected. foilim : array-like (floats [fmin, fmax]) or None or "all" Frequency-window ``[fmin, fmax]`` (in Hz) of interest. The `foi` array will be constructed in 1Hz steps from `fmin` to `fmax` (inclusive). pad_to_length : int, None or 'nextpow2' Padding of the (tapered) signal, if set to a number pads all trials to this absolute length. E.g. `pad_to_length=2000` pads all trials to 2000 samples, if and only if the longest trial is at maximum 2000 samples. Alternatively if all trials have the same initial lengths setting `pad_to_length='nextpow2'` pads all trials to the next power of two. If `None` and trials have unequal lengths all trials are padded to match the longest trial. taper : str Only valid if `method` is `'coh'` or `'granger'`. Windowing function, one of :data:`~syncopy.specest.const_def.availableTapers` tapsmofrq : float Only valid if `method` is `'coh'` or `'granger'` and `taper` is `'dpss'`. The amount of spectral smoothing through multi-tapering (Hz). Note that smoothing frequency specifications are one-sided, i.e., 4 Hz smoothing means plus-minus 4 Hz, i.e., a 8 Hz smoothing box. nTaper : int or None Only valid if `method` is `'coh'` or `'granger'` and ``taper = 'dpss'``. Number of orthogonal tapers to use. It is not recommended to set the number of tapers manually! Leave at `None` for the optimal number to be set automatically. Examples -------- Coming soon... """ # Make sure our one mandatory input object can be processed try: data_parser(data, varname="data", dataclass="AnalogData", writable=None, empty=False) except Exception as exc: raise exc timeAxis = data.dimord.index("time") # Get everything of interest in local namespace defaults = get_defaults(connectivityanalysis) lcls = locals() # check for ineffective additional kwargs check_passed_kwargs(lcls, defaults, frontend_name="connectivity") # Ensure a valid computational method was selected if method not in availableMethods: lgl = "'" + "or '".join(opt + "' " for opt in availableMethods) raise SPYValueError(legal=lgl, varname="method", actual=method) # if a subset selection is present # get sampleinfo and check for equidistancy if data._selection is not None: sinfo = data._selection.trialdefinition[:, :2] trialList = data._selection.trials # user picked discrete set of time points if isinstance(data._selection.time[0], list): lgl = "equidistant time points (toi) or time slice (toilim)" actual = "non-equidistant set of time points" raise SPYValueError(legal=lgl, varname="select", actual=actual) else: trialList = list(range(len(data.trials))) sinfo = data.sampleinfo lenTrials = np.diff(sinfo).squeeze() # check polyremoval if polyremoval is not None: scalar_parser(polyremoval, varname="polyremoval", ntype="int_like", lims=[0, 1]) # --- Padding --- if method == "corr" and pad_to_length: lgl = "`None`, no padding needed/allowed for cross-correlations" actual = f"{pad_to_length}" raise SPYValueError(legal=lgl, varname="pad_to_length", actual=actual) # the actual number of samples in case of later padding nSamples = validate_padding(pad_to_length, lenTrials) # --- Basic foi sanitization --- foi, foilim = validate_foi(foi, foilim, data.samplerate) # only now set foi array for foilim in 1Hz steps if foilim is not None: foi = np.arange(foilim[0], foilim[1] + 1, dtype=float) # Prepare keyword dict for logging (use `lcls` to get actually provided # keyword values, not defaults set above) log_dict = { "method": method, "output": output, "keeptrials": keeptrials, "polyremoval": polyremoval, "pad_to_length": pad_to_length } # --- Setting up specific Methods --- if method in ['coh', 'granger']: # --- set up computation of the single trial CSDs --- if keeptrials is not False: lgl = "False, trial averaging needed!" act = keeptrials raise SPYValueError(lgl, varname="keeptrials", actual=act) # Construct array of maximally attainable frequencies freqs = np.fft.rfftfreq(nSamples, 1 / data.samplerate) # Match desired frequencies as close as possible to # actually attainable freqs # these are the frequencies attached to the SpectralData by the CR! if foi is not None: foi, _ = best_match(freqs, foi, squash_duplicates=True) elif foilim is not None: foi, _ = best_match(freqs, foilim, span=True, squash_duplicates=True) elif foi is None and foilim is None: # Construct array of maximally attainable frequencies msg = (f"Setting frequencies of interest to {freqs[0]:.1f}-" f"{freqs[-1]:.1f}Hz") SPYInfo(msg) foi = freqs # sanitize taper selection and retrieve dpss settings taper_opt = validate_taper( taper, tapsmofrq, nTaper, keeptapers=False, # ST_CSD's always average tapers foimax=foi.max(), samplerate=data.samplerate, nSamples=nSamples, output="pow") # ST_CSD's always have this unit/norm log_dict["foi"] = foi log_dict["taper"] = taper # only dpss returns non-empty taper_opt dict if taper_opt: log_dict["nTaper"] = taper_opt["Kmax"] log_dict["tapsmofrq"] = tapsmofrq check_effective_parameters(ST_CrossSpectra, defaults, lcls) # parallel computation over trials st_compRoutine = ST_CrossSpectra(samplerate=data.samplerate, nSamples=nSamples, taper=taper, taper_opt=taper_opt, polyremoval=polyremoval, timeAxis=timeAxis, foi=foi) # hard coded as class attribute st_dimord = ST_CrossSpectra.dimord if method == 'coh': # final normalization after trial averaging av_compRoutine = NormalizeCrossSpectra(output=output) if method == 'granger': # after trial averaging # hardcoded numerical parameters av_compRoutine = GrangerCausality(rtol=1e-8, nIter=100, cond_max=1e4) if method == 'corr': if lcls['foi'] is not None: msg = 'Parameter `foi` has no effect for `corr`' SPYWarning(msg) check_effective_parameters(ST_CrossCovariance, defaults, lcls) # single trial cross-correlations if keeptrials: av_compRoutine = None # no trial average norm = True # normalize individual trials within the ST CR else: av_compRoutine = NormalizeCrossCov() norm = False # parallel computation over trials st_compRoutine = ST_CrossCovariance(samplerate=data.samplerate, polyremoval=polyremoval, timeAxis=timeAxis, norm=norm) # hard coded as class attribute st_dimord = ST_CrossCovariance.dimord # ------------------------------------------------- # Call the chosen single trial ComputationalRoutine # ------------------------------------------------- # the single trial results need a new DataSet st_out = CrossSpectralData(dimord=st_dimord) # Perform the trial-parallelized computation of the matrix quantity st_compRoutine.initialize( data, st_out._stackingDim, chan_per_worker=None, # no parallelisation over channels possible keeptrials=keeptrials) # we most likely need trial averaging! st_compRoutine.compute(data, st_out, parallel=kwargs.get("parallel"), log_dict=log_dict) # if ever needed.. # for single trial cross-corr results <-> keeptrials is True if keeptrials and av_compRoutine is None: if out is not None: msg = "Single trial processing does not support `out` argument but directly returns the results" SPYWarning(msg) return st_out # ---------------------------------------------------------------------------------- # Sanitize output and call the chosen ComputationalRoutine on the averaged ST output # ---------------------------------------------------------------------------------- # If provided, make sure output object is appropriate if out is not None: try: data_parser(out, varname="out", writable=True, empty=True, dataclass="CrossSpectralData", dimord=st_dimord) except Exception as exc: raise exc new_out = False else: out = CrossSpectralData(dimord=st_dimord) new_out = True # now take the trial average from the single trial CR as input av_compRoutine.initialize(st_out, out._stackingDim, chan_per_worker=None) av_compRoutine.pre_check() # make sure we got a trial_average av_compRoutine.compute(st_out, out, parallel=False, log_dict=log_dict) # Either return newly created output object or simply quit return out if new_out else None
def multipanelplot(self, trials="all", channels="all", toilim=None, avg_channels=False, avg_trials=True, title=None, grid=None, fig=None, **kwargs): """ Plot contents of :class:`~syncopy.AnalogData` objects using multi-panel figure(s) Please refer to :func:`syncopy.multipanelplot` for detailed usage information. Examples -------- Use :func:`~syncopy.tests.misc.generate_artificial_data` to create two synthetic :class:`~syncopy.AnalogData` objects. >>> from syncopy.tests.misc import generate_artificial_data >>> adata = generate_artificial_data(nTrials=10, nChannels=32) >>> bdata = generate_artificial_data(nTrials=5, nChannels=16) Show overview of first 5 channels, averaged across trials 2, 4, and 6: >>> fig = spy.multipanelplot(adata, channels=range(5), trials=[2, 4, 6]) Overlay last 5 channels, averaged across trials 1, 3, 5: >>> fig = spy.multipanelplot(adata, channels=range(27, 32), trials=[1, 3, 5], fig=fig) Do not average trials: >>> fig = spy.multipanelplot(adata, channels=range(27, 32), trials=[1, 3, 5], avg_trials=False) Plot `adata` and `bdata` simultaneously in two separate figures: >>> fig1, fig2 = spy.multipanelplot(adata, bdata, channels=range(5), overlay=False) Overlay `adata` and `bdata`; use channel and trial selections that are valid for both datasets: >>> fig3 = spy.multipanelplot(adata, bdata, channels=range(5), trials=[1, 2, 3], avg_trials=False) See also -------- syncopy.multipanelplot : visualize Syncopy data objects using multi-panel plots """ # Collect input arguments in dict `inputArgs` and process them inputArgs = locals() inputArgs.pop("self") dimArrs, dimCounts, idx, timeIdx, chanIdx = _prep_analog_plots( self, "singlepanelplot", **inputArgs) (nTrials, nChan) = dimCounts (trList, chArr) = dimArrs # Get trial/channel count ("raw" plotting constitutes a special case) if trials is None: nTrials = 0 if avg_trials: msg = "`trials` is `None` but `avg_trials` is `True`. " +\ "Cannot perform trial averaging without trial specification - " +\ "setting ``avg_trials = False``. " SPYWarning(msg) avg_trials = False if avg_channels: msg = "Averaging across channels w/o trial specifications results in " +\ "single-panel plot. Please use `singlepanelplot` instead" SPYWarning(msg) return # If we're overlaying, ensure settings match up if hasattr(fig, "singlepanelplot"): lgl = "overlay-figure generated by `multipanelplot`" act = "figure generated by `singlepanelplot`" raise SPYValueError(legal=lgl, varname="fig/singlepanelplot", actual=act) if hasattr(fig, "nTrialPanels"): if nTrials != fig.nTrialPanels: lgl = "number of trials to plot matching existing panels in figure" act = "{} panels but {} trials for plotting".format( fig.nTrialPanels, nTrials) raise SPYValueError(legal=lgl, varname="trials/figure panels", actual=act) if avg_trials: lgl = "overlay of multi-trial plot" act = "trial averaging was requested for multi-trial plot overlay" raise SPYValueError(legal=lgl, varname="trials/avg_trials", actual=act) if trials is None: lgl = "`trials` to be not `None` to append to multi-trial plot" act = "multi-trial plot overlay was requested but `trials` is `None`" raise SPYValueError(legal=lgl, varname="trials/overlay", actual=act) if not avg_channels and not hasattr(fig, "chanOffsets"): lgl = "single-channel or channel-averages for appending to multi-trial plot" act = "multi-trial multi-channel plot overlay was requested" raise SPYValueError(legal=lgl, varname="avg_channels/overlay", actual=act) if hasattr(fig, "nChanPanels"): if nChan != fig.nChanPanels: lgl = "number of channels to plot matching existing panels in figure" act = "{} panels but {} channels for plotting".format( fig.nChanPanels, nChan) raise SPYValueError(legal=lgl, varname="channels/figure panels", actual=act) if avg_channels: lgl = "overlay of multi-channel plot" act = "channel averaging was requested for multi-channel plot overlay" raise SPYValueError(legal=lgl, varname="channels/avg_channels", actual=act) if not avg_trials: lgl = "overlay of multi-channel plot" act = "mulit-trial plot was requested for multi-channel plot overlay" raise SPYValueError(legal=lgl, varname="channels/avg_trials", actual=act) if hasattr(fig, "chanOffsets"): if avg_channels: lgl = "multi-channel plot" act = "channel averaging was requested for multi-channel plot overlay" raise SPYValueError(legal=lgl, varname="channels/avg_channels", actual=act) if nChan != len(fig.chanOffsets): lgl = "channel-count matching existing multi-channel panels in figure" act = "{} channels per panel but {} channels for plotting".format( len(fig.chanOffsets), nChan) raise SPYValueError(legal=lgl, varname="channels/channels per panel", actual=act) # Generic title for overlay figures overlayTitle = "Overlay of {} datasets" # Either construct subplot panel layout/vet provided layout or fetch existing if fig is None: # Determine no. of required panels if avg_trials and not avg_channels: npanels = nChan elif not avg_trials and avg_channels: npanels = nTrials elif not avg_trials and not avg_channels: npanels = int(nTrials == 0) * nChan + nTrials else: msg = "Averaging across both trials and channels results in " +\ "single-panel plot. Please use `singlepanelplot` instead" SPYWarning(msg) return # Although, `_setup_figure` can call `_layout_subplot_panels` for us, we # need `nrow` and `ncol` below, so do it here if nTrials > 0: xLabel = "Time [s]" else: xLabel = "Samples" nrow = kwargs.get("nrow", None) ncol = kwargs.get("ncol", None) nrow, ncol = _layout_subplot_panels(npanels, nrow=nrow, ncol=ncol) fig, ax_arr = _setup_figure(npanels, nrow=nrow, ncol=ncol, xLabel=xLabel, grid=grid) fig.analogPlot = True # Get existing layout else: ax_arr = fig.get_axes() nrow, ncol = ax_arr[0].numRows, ax_arr[0].numCols # Panels correspond to channels if avg_trials and not avg_channels: # Ensure provided timing selection can actually be averaged (leverage # the fact that `toilim` selections exclusively generate slices) tLengths = _prep_toilim_avg(self) # Compute trial-averaged time-courses: 2D array with slice/list # selection does not require fancy indexing - no need to check this here pltArr = np.zeros((tLengths[0], nChan), dtype=self.data.dtype) for k, trlno in enumerate(trList): idx[timeIdx] = self._selection.time[k] pltArr += np.swapaxes( self._get_trial(trlno)[tuple(idx)], timeIdx, 0) pltArr /= nTrials # Cycle through channels and plot trial-averaged time-courses (time- # axis must be identical for all channels, set up `idx` just once) idx[timeIdx] = self._selection.time[0] time = self.time[trList[k]][self._selection.time[0]] for k, chan in enumerate(chArr): ax_arr[k].plot(time, pltArr[:, k], label=os.path.basename(self.filename)) # If we're overlaying datasets, adjust panel- and sup-titles: include # legend in top-right axis (note: `ax_arr` is row-major flattened) if fig.objCount == 0: for k, chan in enumerate(chArr): ax_arr[k].set_title(chan, size=pltConfig["multiTitleSize"]) fig.nChanPanels = nChan if title is None: if nTrials > 1: title = "Average of {} trials".format(nTrials) else: title = "Trial #{}".format(trList[0]) fig.suptitle(title, size=pltConfig["singleTitleSize"]) else: for k, chan in enumerate(chArr): ax_arr[k].set_title("{0}/{1}".format(ax_arr[k].get_title(), chan)) ax = ax_arr[ncol - 1] handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) if title is None: title = overlayTitle.format(len(handles)) fig.suptitle(title, size=pltConfig["singleTitleSize"]) # Panels correspond to trials elif not avg_trials and avg_channels: # Cycle through panels to plot by-trial channel-averages for k, trlno in enumerate(trList): idx[timeIdx] = self._selection.time[k] time = self.time[trList[k]][self._selection.time[k]] ax_arr[k].plot(time, self._get_trial(trlno)[tuple(idx)].mean( axis=chanIdx).squeeze(), label=os.path.basename(self.filename)) # If we're overlaying datasets, adjust panel- and sup-titles: include # legend in top-right axis (note: `ax_arr` is row-major flattened) if fig.objCount == 0: for k, trlno in enumerate(trList): ax_arr[k].set_title("Trial #{}".format(trlno), size=pltConfig["multiTitleSize"]) fig.nTrialPanels = nTrials if title is None: if nChan > 1: title = "Average of {} channels".format(nChan) else: title = chArr[0] fig.suptitle(title, size=pltConfig["singleTitleSize"]) else: for k, trlno in enumerate(trList): ax_arr[k].set_title("{0}/#{1}".format(ax_arr[k].get_title(), trlno)) ax = ax_arr[ncol - 1] handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) if title is None: title = overlayTitle.format(len(handles)) fig.suptitle(title, size=pltConfig["singleTitleSize"]) # Panels correspond to channels (if `trials` is `None`) otherwise trials elif not avg_trials and not avg_channels: # Plot each channel in separate panel if nTrials == 0: chanSec = np.arange(self.channel.size)[self._selection.channel] for k, chan in enumerate(chanSec): idx[chanIdx] = chan ax_arr[k].plot(self.data[tuple(idx)].squeeze(), label=os.path.basename(self.filename)) # If we're overlaying datasets, adjust panel- and sup-titles: include # legend in top-right axis (note: `ax_arr` is row-major flattened) if fig.objCount == 0: for k, chan in enumerate(chArr): ax_arr[k].set_title(chan, size=pltConfig["multiTitleSize"]) fig.nChanPanels = nChan if title is None: title = "Entire Data Timecourse" fig.suptitle(title, size=pltConfig["singleTitleSize"]) else: for k, chan in enumerate(chArr): ax_arr[k].set_title("{0}/{1}".format( ax_arr[k].get_title(), chan)) ax = ax_arr[ncol - 1] handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) if title is None: title = overlayTitle.format(len(handles)) fig.suptitle(title, size=pltConfig["singleTitleSize"]) # Each trial gets its own panel w/multiple channels per panel else: # If required, compute max amplitude across provided trials + channels if not hasattr(fig, "chanOffsets"): maxAmps = np.zeros((nTrials, ), dtype=self.data.dtype) tickOffsets = maxAmps.copy() for k, trlno in enumerate(trList): idx[timeIdx] = self._selection.time[k] pltArr = np.abs(self._get_trial(trlno)[tuple(idx)]) maxAmps[k] = pltArr.max() tickOffsets[k] = pltArr.mean() fig.chanOffsets = np.cumsum([0] + [maxAmps.max()] * (nChan - 1)) fig.tickOffsets = fig.chanOffsets + tickOffsets.mean() # Cycle through panels to plot by-trial multi-channel time-courses for k, trlno in enumerate(trList): idx[timeIdx] = self._selection.time[k] time = self.time[trList[k]][self._selection.time[k]] pltArr = np.swapaxes( self._get_trial(trlno)[tuple(idx)], timeIdx, 0) ax_arr[k].plot( time, (pltArr + fig.chanOffsets.reshape(1, nChan)).reshape( time.size, nChan), color=plt.rcParams["axes.prop_cycle"].by_key()["color"][ fig.objCount], label=os.path.basename(self.filename)) # If we're overlaying datasets, adjust panel- and sup-titles: include # legend in top-right axis (note: `ax_arr` is row-major flattened) # Note: y-axis is shared across panels, so `yticks` need only be set once if fig.objCount == 0: for k, trlno in enumerate(trList): ax_arr[k].set_title("Trial #{}".format(trlno), size=pltConfig["multiTitleSize"]) ax_arr[0].set_yticks(fig.tickOffsets) ax_arr[0].set_yticklabels(chArr) fig.nTrialPanels = nTrials if title is None: if nChan > 1: title = "{} channels".format(nChan) else: title = chArr[0] fig.suptitle(title, size=pltConfig["singleTitleSize"]) else: for k, trlno in enumerate(trList): ax_arr[k].set_title("{0}/#{1}".format( ax_arr[k].get_title(), trlno)) ax_arr[0].set_yticklabels([" "] * chArr.size) ax = ax_arr[ncol - 1] handles, labels = ax.get_legend_handles_labels() ax.legend(handles[::(nChan + 1)], labels[::(nChan + 1)]) if title is None: title = overlayTitle.format(len(handles)) fig.suptitle(title, size=pltConfig["singleTitleSize"]) # Increment overlay-counter, draw figure and wipe data-selection slot fig.objCount += 1 plt.draw() self._selection = None return fig
def data_parser(data, varname="", dataclass=None, writable=None, empty=None, dimord=None): """ Parse syncopy data objects Parameters ---------- data : syncopy data object Syncopy data object to verify varname : str Local variable name used in caller, see Examples for details. dataclass : None or str Expected class of `data`. If `None` the type of `data` is not inspected. writeable : None or bool If `True` a :class:`~syncopy.shared.errors.SPYValueError` is raised if `data` is read-only, vice versa if `writeable` is `False`. If `None` then `data` is not checked for read/write access. empty : None or bool If `True` a :class:`~syncopy.shared.errors.SPYValueError` is raised if `data` already has contents, if `False` then `data` is checked for non-emptiness. If `None` then `data` is not inspected for contents. dimord : None or list If provided, then `data.dimord` is matched with `dimord` (raising a :class:`~syncopy.shared.errors.SPYValueError` in case of discrepancies). If `None` then `data.dimord` is not inspected. Returns ------- Nothing : None Examples -------- Ensure `adata` is a :class:`~syncopy.datatype.continuous_data.AnalogData` object: >>> data_parser(adata, varname="adata", dataclass="AnalogData") Query adata for write-access and emptiness (e.g., before writing results): >>> data_parser(adata, varname="adata", writeable=True, empty=True) See also -------- array_parser : similar functionality for parsing array-like objects """ # Make sure `data` is (derived from) `BaseData` if not any(["BaseData" in str(base) for base in data.__class__.__mro__]): raise SPYTypeError(data, varname=varname, expected="Syncopy data object") # If requested, check specific data-class of object if dataclass is not None: if data.__class__.__name__ != str(dataclass): msg = "Syncopy {} object".format(dataclass) raise SPYTypeError(data, varname=varname, expected=msg) # If requested, ensure object contains data (or not) if empty is not None: legal = "{status:s} Syncopy data object" if empty and not data._is_empty(): raise SPYValueError(legal=legal.format(status="empty"), varname=varname, actual="non-empty") elif not empty and data._is_empty(): raise SPYValueError(legal=legal.format(status="non-empty"), varname=varname, actual="empty") # If requested, ensure proper access to object if writable is not None: legal = "{access:s} to Syncopy data object" actual = "mode = {mode:s}" if writable and data.mode == "r": raise SPYValueError(legal=legal.format(access="write-access"), varname=varname, actual=actual.format(mode=data.mode)) elif not writable and data.mode != "r": raise SPYValueError(legal=legal.format(access="read-only-access"), varname=varname, actual=actual.format(mode=data.mode)) # If requested, check integrity of dimensional information (if non-empty) if dimord is not None: base = "Syncopy {diminfo:s} data object" if data.dimord != dimord: legal = base.format(diminfo="'" + "' x '".join(str(dim) for dim in dimord) + "'") actual = base.format(diminfo="'" + "' x '".join(str(dim) for dim in data.dimord) + "' " if data.dimord else "empty") raise SPYValueError(legal=legal, varname=varname, actual=actual) return
def selectdata(data, trials=None, channels=None, channels_i=None, channels_j=None, toi=None, toilim=None, foi=None, foilim=None, tapers=None, units=None, eventids=None, out=None, inplace=False, clear=False, **kwargs): """ Create a new Syncopy object from a selection **Usage Notice** Syncopy offers two modes for selecting data: * **in-place** selections mark subsets of a Syncopy data object for processing via a ``select`` dictionary *without* creating a new object * **deep-copy** selections copy subsets of a Syncopy data object to keep and preserve in a new object created by :func:`~syncopy.selectdata` All Syncopy metafunctions, such as :func:`~syncopy.freqanalysis`, support **in-place** data selection via a ``select`` keyword, effectively avoiding potentially slow copy operations and saving disk space. The keys accepted by the `select` dictionary are identical to the keyword arguments discussed below. In addition, ``select = "all"`` can be used to select entire object contents. Examples >>> select = {"toilim" : [-0.25, 0]} >>> spy.freqanalysis(data, select=select) >>> # or equivalently >>> cfg = spy.get_defaults(spy.freqanalysis) >>> cfg.select = select >>> spy.freqanalysis(cfg, data) **Usage Summary** List of Syncopy data objects and respective valid data selectors: :class:`~syncopy.AnalogData` : trials, channels, toi/toilim Examples >>> spy.selectdata(data, trials=[0, 3, 5], channels=["channel01", "channel02"]) >>> cfg = spy.StructDict() >>> cfg.trials = [5, 3, 0]; cfg.toilim = [0.25, 0.5] >>> spy.selectdata(cfg, data) :class:`~syncopy.SpectralData` : trials, channels, toi/toilim, foi/foilim, tapers Examples >>> spy.selectdata(data, trials=[0, 3, 5], channels=["channel01", "channel02"]) >>> cfg = spy.StructDict() >>> cfg.foi = [30, 40, 50]; cfg.tapers = slice(2, 4) >>> spy.selectdata(cfg, data) :class:`~syncopy.EventData` : trials, toi/toilim, eventids Examples >>> spy.selectdata(data, toilim=[-1, 2.5], eventids=[0, 1]) >>> cfg = spy.StructDict() >>> cfg.trials = [0, 0, 1, 0]; cfg.eventids = slice(2, None) >>> spy.selectdata(cfg, data) :class:`~syncopy.SpikeData` : trials, toi/toilim, units, channels Examples >>> spy.selectdata(data, toilim=[-1, 2.5], units=range(0, 10)) >>> cfg = spy.StructDict() >>> cfg.toi = [1.25, 3.2]; cfg.trials = [0, 1, 2, 3] >>> spy.selectdata(cfg, data) **Note** Any property that is not specifically accessed via one of the provided selectors is taken as is, e.g., ``spy.selectdata(data, trials=[1, 2])`` selects the entire contents of trials no. 2 and 3, while ``spy.selectdata(data, channels=range(0, 50))`` selects the first 50 channels of `data` across all defined trials. Consequently, if no keywords are specified, the entire contents of `data` is selected. **Full documentation below** Parameters ---------- data : Syncopy data object A non-empty Syncopy data object. **Note** the type of `data` determines which keywords can be used. Some keywords are only valid for certain types of Syncopy objects, e.g., "freqs" is not a valid selector for an :class:`~syncopy.AnalogData` object. trials : list (integers) or None or "all" List of integers representing trial numbers to be selected; can include repetitions and need not be sorted (e.g., ``trials = [0, 1, 0, 0, 2]`` is valid) but must be finite and not NaN. If `trials` is `None`, or ``trials = "all"`` all trials are selected. channels : list (integers or strings), slice, range or None or "all" Channel-selection; can be a list of channel names (``['channel3', 'channel1']``), a list of channel indices (``[3, 5]``), a slice (``slice(3, 10)``) or range (``range(3, 10)``). Note that following Python conventions, channels are counted starting at zero, and range and slice selections are half-open intervals of the form `[low, high)`, i.e., low is included , high is excluded. Thus, ``channels = [0, 1, 2]`` or ``channels = slice(0, 3)`` selects the first up to (and including) the third channel. Selections can be unsorted and may include repetitions but must match exactly, be finite and not NaN. If `channels` is `None`, or ``channels = "all"`` all channels are selected. toi : list (floats) or None or "all" Time-points to be selected (in seconds) in each trial. Timing is expected to be on a by-trial basis (e.g., relative to trigger onsets). Selections can be approximate, unsorted and may include repetitions but must be finite and not NaN. Fuzzy matching is performed for approximate selections (i.e., selected time-points are close but not identical to timing information found in `data`) using a nearest-neighbor search for elements of `toi`. If `toi` is `None` or ``toi = "all"``, the entire time-span in each trial is selected. toilim : list (floats [tmin, tmax]) or None or "all" Time-window ``[tmin, tmax]`` (in seconds) to be extracted from each trial. Window specifications must be sorted (e.g., ``[2.2, 1.1]`` is invalid) and not NaN but may be unbounded (e.g., ``[1.1, np.inf]`` is valid). Edges `tmin` and `tmax` are included in the selection. If `toilim` is `None` or ``toilim = "all"``, the entire time-span in each trial is selected. foi : list (floats) or None or "all" Frequencies to be selected (in Hz). Selections can be approximate, unsorted and may include repetitions but must be finite and not NaN. Fuzzy matching is performed for approximate selections (i.e., selected frequencies are close but not identical to frequencies found in `data`) using a nearest- neighbor search for elements of `foi` in `data.freq`. If `foi` is `None` or ``foi = "all"``, all frequencies are selected. foilim : list (floats [fmin, fmax]) or None or "all" Frequency-window ``[fmin, fmax]`` (in Hz) to be extracted. Window specifications must be sorted (e.g., ``[90, 70]`` is invalid) and not NaN but may be unbounded (e.g., ``[-np.inf, 60.5]`` is valid). Edges `fmin` and `fmax` are included in the selection. If `foilim` is `None` or ``foilim = "all"``, all frequencies are selected. tapers : list (integers or strings), slice, range or None or "all" Taper-selection; can be a list of taper names (``['dpss-win-1', 'dpss-win-3']``), a list of taper indices (``[3, 5]``), a slice (``slice(3, 10)``) or range (``range(3, 10)``). Note that following Python conventions, tapers are counted starting at zero, and range and slice selections are half-open intervals of the form `[low, high)`, i.e., low is included , high is excluded. Thus, ``tapers = [0, 1, 2]`` or ``tapers = slice(0, 3)`` selects the first up to (and including) the third taper. Selections can be unsorted and may include repetitions but must match exactly, be finite and not NaN. If `tapers` is `None` or ``tapers = "all"``, all tapers are selected. units : list (integers or strings), slice, range or None or "all" Unit-selection; can be a list of unit names (``['unit10', 'unit3']``), a list of unit indices (``[3, 5]``), a slice (``slice(3, 10)``) or range (``range(3, 10)``). Note that following Python conventions, units are counted starting at zero, and range and slice selections are half-open intervals of the form `[low, high)`, i.e., low is included , high is excluded. Thus, ``units = [0, 1, 2]`` or ``units = slice(0, 3)`` selects the first up to (and including) the third unit. Selections can be unsorted and may include repetitions but must match exactly, be finite and not NaN. If `units` is `None` or ``units = "all"``, all units are selected. eventids : list (integers), slice, range or None or "all" Event-ID-selection; can be a list of event-id codes (``[2, 0, 1]``), slice (``slice(0, 2)``) or range (``range(0, 2)``). Note that following Python conventions, range and slice selections are half-open intervals of the form `[low, high)`, i.e., low is included , high is excluded. Selections can be unsorted and may include repetitions but must match exactly, be finite and not NaN. If `eventids` is `None` or ``eventids = "all"``, all events are selected. inplace : bool If `inplace` is `True` **no** new object is created. Instead the provided selection is stored in the input object's `_selection` attribute for later use. By default `inplace` is `False` and all calls to `selectdata` create a new Syncopy data object. Returns ------- dataselection : Syncopy data object Syncopy data object of the same type as `data` but containing only the subset specified by provided selectors. Notes ----- This routine represents a convenience function for creating new Syncopy objects based on existing data entities. However, in many situations, the creation of a new object (and thus the allocation of additional disk-space) might not be necessary: all Syncopy metafunctions, such as :func:`~syncopy.freqanalysis`, support **in-place** data selection. Consider the following example: assume `data` is an :class:`~syncopy.AnalogData` object representing 220 trials of LFP recordings containing baseline (between second -0.25 and 0) and stimulus-on data (on the interval [0.25, 0.5]). To compute the baseline spectrum, data-selection does **not** have to be performed before calling :func:`~syncopy.freqanalysis` but instead can be done in-place: >>> import syncopy as spy >>> cfg = spy.get_defaults(spy.freqanalysis) >>> cfg.method = 'mtmfft' >>> cfg.taper = 'dpss' >>> cfg.output = 'pow' >>> cfg.tapsmofrq = 10 >>> # define baseline/stimulus-on ranges >>> baseSelect = {"toilim": [-0.25, 0]} >>> stimSelect = {"toilim": [0.25, 0.5]} >>> # in-place selection of baseline interval performed by `freqanalysis` >>> cfg.select = baseSelect >>> baselineSpectrum = spy.freqanalysis(cfg, data) >>> # in-place selection of stimulus-on time-frame performed by `freqanalysis` >>> cfg.select = stimSelect >>> stimonSpectrum = spy.freqanalysis(cfg, data) Especially for large data-sets, in-place data selection performed by Syncopy's metafunctions does not only save disk-space but can significantly increase performance. Examples -------- Use :func:`~syncopy.tests.misc.generate_artificial_data` to create a synthetic :class:`syncopy.AnalogData` object. >>> from syncopy.tests.misc import generate_artificial_data >>> adata = generate_artificial_data(nTrials=10, nChannels=32) Assume a hypothetical trial onset at second 2.0 with the first second of each trial representing baseline recordings. To extract only the stimulus-on period from `adata`, one could use >>> stimon = spy.selectdata(adata, toilim=[2.0, np.inf]) Note that this is equivalent to >>> stimon = adata.selectdata(toilim=[2.0, np.inf]) See also -------- :func:`syncopy.show` : Show (subsets) of Syncopy objects """ # Ensure our one mandatory input is usable try: data_parser(data, varname="data", empty=False) except Exception as exc: raise exc # Vet the only inputs not checked by `Selector` if not isinstance(inplace, bool): raise SPYTypeError(inplace, varname="inplace", expected="Boolean") if not isinstance(inplace, bool): raise SPYTypeError(clear, varname="clear", expected="Boolean") # If provided, make sure output object is appropriate if not inplace: if out is not None: try: data_parser(out, varname="out", writable=True, empty=True, dataclass=data.__class__.__name__, dimord=data.dimord) except Exception as exc: raise exc new_out = False else: out = data.__class__(dimord=data.dimord) new_out = True else: if out is not None: lgl = "no output object for in-place selection" raise SPYValueError(lgl, varname="out", actual=out.__class__.__name__) # FIXME: remove once tests are in place (cf #165) if channels_i is not None or channels_j is not None: SPYWarning( "CrossSpectralData channel selection currently untested and experimental!" ) # Collect provided keywords in dict selectDict = { "trials": trials, "channels": channels, "channels_i": channels_i, "channels_j": channels_j, "toi": toi, "toilim": toilim, "foi": foi, "foilim": foilim, "tapers": tapers, "units": units, "eventids": eventids } # First simplest case: determine whether we just need to clear an existing selection if clear: if any(value is not None for value in selectDict.values()): lgl = "no data selectors if `clear = True`" raise SPYValueError(lgl, varname="select", actual=selectDict) if data._selection is None: SPYInfo("No in-place selection found. ") else: data._selection = None SPYInfo("In-place selection cleared") return # Pass provided selections on to `Selector` class which performs error checking data._selection = selectDict # If an in-place selection was requested we're done if inplace: SPYInfo("In-place selection attached to data object: {}".format( data._selection)) return # Create inventory of all available selectors and actually provided values # to create a bookkeeping dict for logging log_dct = {"inplace": inplace, "clear": clear} log_dct.update(selectDict) log_dct.update(**kwargs) # Fire up `ComputationalRoutine`-subclass to do the actual selecting/copying selectMethod = DataSelection() selectMethod.initialize(data, out._stackingDim, chan_per_worker=kwargs.get("chan_per_worker")) selectMethod.compute(data, out, parallel=kwargs.get("parallel"), log_dict=log_dct) # Wipe data-selection slot to not alter input object data._selection = None # Either return newly created output object or simply quit return out if new_out else None
def freqanalysis(data, method='mtmfft', output='fourier', keeptrials=True, foi=None, foilim=None, pad_to_length=None, polyremoval=None, taper="hann", tapsmofrq=None, nTaper=None, keeptapers=False, toi="all", t_ftimwin=None, wavelet="Morlet", width=6, order=None, order_max=None, order_min=1, c_1=3, adaptive=False, out=None, **kwargs): """ Perform (time-)frequency analysis of Syncopy :class:`~syncopy.AnalogData` objects **Usage Summary** Options available in all analysis methods: * **output** : one of :data:`~syncopy.specest.const_def.availableOutputs`; return power spectra, complex Fourier spectra or absolute values. * **foi**/**foilim** : frequencies of interest; either array of frequencies or frequency window (not both) * **keeptrials** : return individual trials or grand average * **polyremoval** : de-trending method to use (0 = mean, 1 = linear or `None`) List of available analysis methods and respective distinct options: "mtmfft" : (Multi-)tapered Fourier transform Perform frequency analysis on time-series trial data using either a single taper window (Hanning) or many tapers based on the discrete prolate spheroidal sequence (DPSS) that maximize energy concentration in the main lobe. * **taper** : one of :data:`~syncopy.shared.const_def.availableTapers` * **tapsmofrq** : spectral smoothing box for slepian tapers (in Hz) * **nTaper** : number of orthogonal tapers for slepian tapers * **keeptapers** : return individual tapers or average * **pad_to_length**: either pad to an absolute length or set to `'nextpow2'` "mtmconvol" : (Multi-)tapered sliding window Fourier transform Perform time-frequency analysis on time-series trial data based on a sliding window short-time Fourier transform using either a single Hanning taper or multiple DPSS tapers. * **taper** : one of :data:`~syncopy.specest.const_def.availableTapers` * **tapsmofrq** : spectral smoothing box for slepian tapers (in Hz) * **nTaper** : number of orthogonal tapers for slepian tapers * **keeptapers** : return individual tapers or average * **toi** : time-points of interest; can be either an array representing analysis window centroids (in sec), a scalar between 0 and 1 encoding the percentage of overlap between adjacent windows or "all" to center a window on every sample in the data. * **t_ftimwin** : sliding window length (in sec) "wavelet" : (Continuous non-orthogonal) wavelet transform Perform time-frequency analysis on time-series trial data using a non-orthogonal continuous wavelet transform. * **wavelet** : one of :data:`~syncopy.specest.const_def.availableWavelets` * **toi** : time-points of interest; can be either an array representing time points (in sec) or "all"(pre-trimming and subsampling of results) * **width** : Nondimensional frequency constant of Morlet wavelet function (>= 6) * **order** : Order of Paul wavelet function (>= 4) or derivative order of real-valued DOG wavelets (2 = mexican hat) "superlet" : Superlet transform Perform time-frequency analysis on time-series trial data using the super-resolution superlet transform (SLT) from [Moca2021]_. * **order_max** : Maximal order of the superlet * **order_min** : Minimal order of the superlet * **c_1** : Number of cycles of the base Morlet wavelet * **adaptive** : If set to `True` perform fractional adaptive SLT, otherwise perform multiplicative SLT **Full documentation below** Parameters ---------- data : `~syncopy.AnalogData` A non-empty Syncopy :class:`~syncopy.datatype.AnalogData` object method : str Spectral estimation method, one of :data:`~syncopy.specest.const_def.availableMethods` (see below). output : str Output of spectral estimation. One of :data:`~syncopy.specest.const_def.availableOutputs` (see below); use `'pow'` for power spectrum (:obj:`numpy.float32`), `'fourier'` for complex Fourier coefficients (:obj:`numpy.complex64`) or `'abs'` for absolute values (:obj:`numpy.float32`). keeptrials : bool If `True` spectral estimates of individual trials are returned, otherwise results are averaged across trials. foi : array-like or None Frequencies of interest (Hz) for output. If desired frequencies cannot be matched exactly, the closest possible frequencies are used. If `foi` is `None` or ``foi = "all"``, all attainable frequencies (i.e., zero to Nyquist / 2) are selected. foilim : array-like (floats [fmin, fmax]) or None or "all" Frequency-window ``[fmin, fmax]`` (in Hz) of interest. Window specifications must be sorted (e.g., ``[90, 70]`` is invalid) and not NaN but may be unbounded (e.g., ``[-np.inf, 60.5]`` is valid). Edges `fmin` and `fmax` are included in the selection. If `foilim` is `None` or ``foilim = "all"``, all frequencies are selected. pad_to_length : int, None or 'nextpow2' Padding of the input data, if set to a number pads all trials to this absolute length. For instance ``pad_to_length = 2000`` pads all trials to an absolute length of 2000 samples, if and only if the longest trial contains at maximum 2000 samples. Alternatively if all trials have the same initial lengths setting `pad_to_length='nextpow2'` pads all trials to the next power of two. If `None` and trials have unequal lengths all trials are padded to match the longest trial. polyremoval : int or None Order of polynomial used for de-trending data in the time domain prior to spectral analysis. A value of 0 corresponds to subtracting the mean ("de-meaning"), ``polyremoval = 1`` removes linear trends (subtracting the least squares fit of a linear polynomial). If `polyremoval` is `None`, no de-trending is performed. Note that for spectral estimation de-meaning is very advisable and hence also the default. taper : str Only valid if `method` is `'mtmfft'` or `'mtmconvol'`. Windowing function, one of :data:`~syncopy.specest.const_def.availableTapers` (see below). tapsmofrq : float Only valid if `method` is `'mtmfft'` or `'mtmconvol'` and `taper` is `'dpss'`. The amount of spectral smoothing through multi-tapering (Hz). Note that smoothing frequency specifications are one-sided, i.e., 4 Hz smoothing means plus-minus 4 Hz, i.e., a 8 Hz smoothing box. nTaper : int or None Only valid if `method` is `'mtmfft'` or `'mtmconvol'` and `taper='dpss'`. Number of orthogonal tapers to use. It is not recommended to set the number of tapers manually! Leave at `None` for the optimal number to be set automatically. keeptapers : bool Only valid if `method` is `'mtmfft'` or `'mtmconvol'`. If `True`, return spectral estimates for each taper. Otherwise power spectrum is averaged across tapers, if and only if `output` is `pow`. toi : float or array-like or "all" **Mandatory input** for time-frequency analysis methods (`method` is either `"mtmconvol"` or `"wavelet"` or `"superlet"`). If `toi` is scalar, it must be a value between 0 and 1 indicating the percentage of overlap between time-windows specified by `t_ftimwin` (only valid if `method` is `'mtmconvol'`). If `toi` is an array it explicitly selects the centroids of analysis windows (in seconds), if `toi` is `"all"`, analysis windows are centered on all samples in the data for `method="mtmconvol"`. For wavelet based methods (`"wavelet"` or `"superlet"`) toi needs to be either an equidistant array of time points or "all". t_ftimwin : positive float Only valid if `method` is `'mtmconvol'`. Sliding window length (in seconds). wavelet : str Only valid if `method` is `'wavelet'`. Wavelet function to use, one of :data:`~syncopy.specest.const_def.availableWavelets` (see below). width : positive float Only valid if `method` is `'wavelet'` and `wavelet` is `'Morlet'`. Nondimensional frequency constant of Morlet wavelet function. This number should be >= 6, which corresponds to 6 cycles within the analysis window to ensure sufficient spectral sampling. order : positive int Only valid if `method` is `'wavelet'` and `wavelet` is `'Paul'` or `'DOG'`. Order of the wavelet function. If `wavelet` is `'Paul'`, `order` should be chosen >= 4 to ensure that the analysis window contains at least a single oscillation. At an order of 40, the Paul wavelet exhibits about the same number of cycles as the Morlet wavelet with a `width` of 6. All other supported wavelets functions are *real-valued* derivatives of Gaussians (DOGs). Hence, if `wavelet` is `'DOG'`, `order` represents the derivative order. The special case of a second order DOG yields a function known as "Mexican Hat", "Marr" or "Ricker" wavelet, which can be selected alternatively by setting `wavelet` to `'Mexican_hat'`, `'Marr'` or `'Ricker'`. **Note**: A real-valued wavelet function encodes *only* information about peaks and discontinuities in the signal and does *not* provide any information about amplitude or phase. order_max : int Only valid if `method` is `'superlet'`. Maximal order of the superlet set. Controls the maximum number of cycles within a SL together with the `c_1` parameter: c_max = c_1 * order_max order_min : int Only valid if `method` is `'superlet'`. Minimal order of the superlet set. Controls the minimal number of cycles within a SL together with the `c_1` parameter: c_min = c_1 * order_min Note that for admissability reasons c_min should be at least 3! c_1 : int Only valid if `method` is `'superlet'`. Number of cycles of the base Morlet wavelet. If set to lower than 3 increase `order_min` as to never have less than 3 cycles in a wavelet! adaptive : bool Only valid if `method` is `'superlet'`. Wether to perform multiplicative SLT or fractional adaptive SLT. If set to True, the order of the wavelet set will increase linearly with the frequencies of interest from `order_min` to `order_max`. If set to False the same SL will be used for all frequencies. out : None or :class:`SpectralData` object None if a new :class:`SpectralData` object is to be created, or an empty :class:`SpectralData` object Returns ------- spec : :class:`~syncopy.SpectralData` (Time-)frequency spectrum of input data Notes ----- .. [Moca2021] Moca, Vasile V., et al. "Time-frequency super-resolution with superlets." Nature communications 12.1 (2021): 1-18. **Options** .. autodata:: syncopy.specest.const_def.availableMethods .. autodata:: syncopy.specest.const_def.availableOutputs .. autodata:: syncopy.specest.const_def.availableTapers .. autodata:: syncopy.specest.const_def.availableWavelets Examples -------- Coming soon... See also -------- syncopy.specest.mtmfft.mtmfft : (multi-)tapered Fourier transform of multi-channel time series data syncopy.specest.mtmconvol.mtmconvol : time-frequency analysis of multi-channel time series data with a sliding window FFT syncopy.specest.wavelet.wavelet : time-frequency analysis of multi-channel time series data using a wavelet transform numpy.fft.fft : NumPy's reference FFT implementation scipy.signal.stft : SciPy's Short Time Fourier Transform """ # Make sure our one mandatory input object can be processed try: data_parser(data, varname="data", dataclass="AnalogData", writable=None, empty=False) except Exception as exc: raise exc timeAxis = data.dimord.index("time") # Get everything of interest in local namespace defaults = get_defaults(freqanalysis) lcls = locals() # check for ineffective additional kwargs check_passed_kwargs(lcls, defaults, frontend_name="freqanalysis") # Ensure a valid computational method was selected if method not in availableMethods: lgl = "'" + "or '".join(opt + "' " for opt in availableMethods) raise SPYValueError(legal=lgl, varname="method", actual=method) # Ensure a valid output format was selected if output not in spectralConversions.keys(): lgl = "'" + "or '".join(opt + "' " for opt in spectralConversions.keys()) raise SPYValueError(legal=lgl, varname="output", actual=output) # Parse all Boolean keyword arguments for vname in ["keeptrials", "keeptapers"]: if not isinstance(lcls[vname], bool): raise SPYTypeError(lcls[vname], varname=vname, expected="Bool") # If only a subset of `data` is to be processed, make some necessary adjustments # of the sampleinfo and trial lengths if data._selection is not None: sinfo = data._selection.trialdefinition[:, :2] trialList = data._selection.trials else: trialList = list(range(len(data.trials))) sinfo = data.sampleinfo lenTrials = np.diff(sinfo).squeeze() if not lenTrials.shape: lenTrials = lenTrials[None] numTrials = len(trialList) # check polyremoval if polyremoval is not None: scalar_parser(polyremoval, varname="polyremoval", ntype="int_like", lims=[0, 1]) # --- Padding --- # Sliding window FFT does not support "fancy" padding if method == "mtmconvol" and isinstance(pad_to_length, str): msg = "method 'mtmconvol' only supports in-place padding for windows " +\ "exceeding trial boundaries. Your choice of `pad_to_length = '{}'` will be ignored. " SPYWarning(msg.format(pad_to_length)) if method == 'mtmfft': # the actual number of samples in case of later padding minSampleNum = validate_padding(pad_to_length, lenTrials) else: minSampleNum = lenTrials.min() # Compute length (in samples) of shortest trial minTrialLength = minSampleNum / data.samplerate # Shortcut to data sampling interval dt = 1 / data.samplerate foi, foilim = validate_foi(foi, foilim, data.samplerate) # see also https://docs.obspy.org/_modules/obspy/signal/detrend.html#polynomial if polyremoval is not None: try: scalar_parser(polyremoval, varname="polyremoval", lims=[0, 1], ntype="int_like") except Exception as exc: raise exc # Prepare keyword dict for logging (use `lcls` to get actually provided # keyword values, not defaults set above) log_dct = {"method": method, "output": output, "keeptapers": keeptapers, "keeptrials": keeptrials, "polyremoval": polyremoval, "pad_to_length": pad_to_length} # -------------------------------- # 1st: Check time-frequency inputs # to prepare/sanitize `toi` # -------------------------------- if method in ["mtmconvol", "wavelet", "superlet"]: # Get start/end timing info respecting potential in-place selection if toi is None: raise SPYTypeError(toi, varname="toi", expected="scalar or array-like or 'all'") if data._selection is not None: tStart = data._selection.trialdefinition[:, 2] / data.samplerate else: tStart = data._t0 / data.samplerate tEnd = tStart + lenTrials / data.samplerate # for these methods only 'all' or an equidistant array # of time points (sub-sampling, trimming) are valid if method in ["wavelet", "superlet"]: valid = True if isinstance(toi, Number): valid = False elif isinstance(toi, str): if toi != "all": valid = False else: # take everything preSelect = [slice(None)] * numTrials postSelect = [slice(None)] * numTrials elif not iter(toi): valid = False # this is the sequence type - can only be an interval! else: try: array_parser(toi, varname="toi", hasinf=False, hasnan=False, lims=[tStart.min(), tEnd.max()], dims=(None,)) except Exception as exc: raise exc toi = np.array(toi) # check for equidistancy if not np.allclose(np.diff(toi, 2), np.zeros(len(toi) - 2)): valid = False # trim (preSelect) and subsample output (postSelect) else: preSelect = [] postSelect = [] # get sample intervals and relative indices from toi for tk in range(numTrials): start = int(data.samplerate * (toi[0] - tStart[tk])) stop = int(data.samplerate * (toi[-1] - tStart[tk]) + 1) preSelect.append(slice(max(0, start), max(stop, stop - start))) smpIdx = np.minimum(lenTrials[tk] - 1, data.samplerate * (toi - tStart[tk]) - start) postSelect.append(smpIdx.astype(np.intp)) # get out if sth wasn't right if not valid: lgl = "array of equidistant time-points or 'all' for wavelet based methods" raise SPYValueError(legal=lgl, varname="toi", actual=toi) # Update `log_dct` w/method-specific options (use `lcls` to get actually # provided keyword values, not defaults set in here) log_dct["toi"] = lcls["toi"] # -------------------------------------------- # Check options specific to mtm*-methods # (particularly tapers and foi/freqs alignment) # -------------------------------------------- if "mtm" in method: if method == "mtmconvol": # get the sliding window size try: scalar_parser(t_ftimwin, varname="t_ftimwin", lims=[dt, minTrialLength]) except Exception as exc: SPYInfo("Please specify 't_ftimwin' parameter.. exiting!") raise exc # this is the effective sliding window FFT sample size minSampleNum = int(t_ftimwin * data.samplerate) # Construct array of maximally attainable frequencies freqs = np.fft.rfftfreq(minSampleNum, dt) # Match desired frequencies as close as possible to # actually attainable freqs # these are the frequencies attached to the SpectralData by the CR! if foi is not None: foi, _ = best_match(freqs, foi, squash_duplicates=True) elif foilim is not None: foi, _ = best_match(freqs, foilim, span=True, squash_duplicates=True) else: msg = (f"Automatic FFT frequency selection from {freqs[0]:.1f}Hz to " f"{freqs[-1]:.1f}Hz") SPYInfo(msg) foi = freqs log_dct["foi"] = foi # Abort if desired frequency selection is empty if foi.size == 0: lgl = "non-empty frequency specification" act = "empty frequency selection" raise SPYValueError(legal=lgl, varname="foi/foilim", actual=act) # sanitize taper selection and retrieve dpss settings taper_opt = validate_taper(taper, tapsmofrq, nTaper, keeptapers, foimax=foi.max(), samplerate=data.samplerate, nSamples=minSampleNum, output=output) # Update `log_dct` w/method-specific options log_dct["taper"] = taper # only dpss returns non-empty taper_opt dict if taper_opt: log_dct["nTaper"] = taper_opt["Kmax"] log_dct["tapsmofrq"] = tapsmofrq # ------------------------------------------------------- # Now, prepare explicit compute-classes for chosen method # ------------------------------------------------------- if method == "mtmfft": check_effective_parameters(MultiTaperFFT, defaults, lcls) # method specific parameters method_kwargs = { 'samplerate': data.samplerate, 'taper': taper, 'taper_opt': taper_opt, 'nSamples': minSampleNum } # Set up compute-class specestMethod = MultiTaperFFT( foi=foi, timeAxis=timeAxis, keeptapers=keeptapers, polyremoval=polyremoval, output_fmt=output, method_kwargs=method_kwargs) elif method == "mtmconvol": check_effective_parameters(MultiTaperFFTConvol, defaults, lcls) # Process `toi` for sliding window multi taper fft, # we have to account for three scenarios: (1) center sliding # windows on all samples in (selected) trials (2) `toi` was provided as # percentage indicating the degree of overlap b/w time-windows and (3) a set # of discrete time points was provided. These three cases are encoded in # `overlap, i.e., ``overlap > 1` => all, `0 < overlap < 1` => percentage, # `overlap < 0` => discrete `toi` # overlap = None if isinstance(toi, str): if toi != "all": lgl = "`toi = 'all'` to center analysis windows on all time-points" raise SPYValueError(legal=lgl, varname="toi", actual=toi) equidistant = True overlap = np.inf elif isinstance(toi, Number): try: scalar_parser(toi, varname="toi", lims=[0, 1]) except Exception as exc: raise exc overlap = toi equidistant = True # this captures all other cases, e.i. toi is of sequence type else: overlap = -1 try: array_parser(toi, varname="toi", hasinf=False, hasnan=False, lims=[tStart.min(), tEnd.max()], dims=(None,)) except Exception as exc: raise exc toi = np.array(toi) tSteps = np.diff(toi) if (tSteps < 0).any(): lgl = "ordered list/array of time-points" act = "unsorted list/array" raise SPYValueError(legal=lgl, varname="toi", actual=act) # Account for round-off errors: if toi spacing is almost at sample interval # manually correct it if np.isclose(tSteps.min(), dt): tSteps[np.isclose(tSteps, dt)] = dt if tSteps.min() < dt: msg = f"`toi` selection too fine, max. time resolution is {dt}s" SPYWarning(msg) # This is imho a bug in NumPy - even `arange` and `linspace` may produce # arrays that are numerically not exactly equidistant - `unique` will # show several entries here - use `allclose` to identify "even" spacings equidistant = np.allclose(tSteps, [tSteps[0]] * tSteps.size) # If `toi` was 'all' or a percentage, use entire time interval of (selected) # trials and check if those trials have *approximately* equal length if toi is None: if not np.allclose(lenTrials, [minSampleNum] * lenTrials.size): msg = "processing trials of different lengths (min = {}; max = {} samples)" +\ " with `toi = 'all'`" SPYWarning(msg.format(int(minSampleNum), int(lenTrials.max()))) # number of samples per window nperseg = int(t_ftimwin * data.samplerate) halfWin = int(nperseg / 2) postSelect = slice(None) # select all is the default if 0 <= overlap <= 1: # `toi` is percentage noverlap = min(nperseg - 1, int(overlap * nperseg)) # windows get shifted exactly 1 sample # to get a spectral estimate at each sample else: noverlap = nperseg - 1 # `toi` is array if overlap < 0: # Compute necessary padding at begin/end of trials to fit sliding windows offStart = ((toi[0] - tStart) * data.samplerate).astype(np.intp) padBegin = halfWin - offStart padBegin = ((padBegin > 0) * padBegin).astype(np.intp) offEnd = ((tEnd - toi[-1]) * data.samplerate).astype(np.intp) padEnd = halfWin - offEnd padEnd = ((padEnd > 0) * padEnd).astype(np.intp) # Compute sample-indices (one slice/list per trial) from time-selections soi = [] if equidistant: # soi just trims the input data to the [toi[0], toi[-1]] interval # postSelect then subsamples the spectral esimate to the user given toi postSelect = [] for tk in range(numTrials): start = max(0, int(round(data.samplerate * (toi[0] - tStart[tk]) - halfWin))) stop = int(round(data.samplerate * (toi[-1] - tStart[tk]) + halfWin + 1)) soi.append(slice(start, max(stop, stop - start))) # chosen toi subsampling interval in sample units, min. is 1; # compute `delta_idx` s.t. stop - start / delta_idx == toi.size delta_idx = int(round((soi[0].stop - soi[0].start) / toi.size)) delta_idx = delta_idx if delta_idx > 1 else 1 postSelect = slice(None, None, delta_idx) else: for tk in range(numTrials): starts = (data.samplerate * (toi - tStart[tk]) - halfWin).astype(np.intp) starts += padBegin[tk] stops = (data.samplerate * (toi - tStart[tk]) + halfWin + 1).astype(np.intp) stops += padBegin[tk] stops = np.maximum(stops, stops - starts, dtype=np.intp) soi.append([slice(start, stop) for start, stop in zip(starts, stops)]) # postSelect here remains slice(None), as resulting spectrum # has exactly one entry for each soi # `toi` is percentage or "all" else: soi = [slice(None)] * numTrials # Collect keyword args for `mtmconvol` in dictionary method_kwargs = {"samplerate": data.samplerate, "nperseg": nperseg, "noverlap": noverlap, "taper" : taper, "taper_opt" : taper_opt} # Set up compute-class specestMethod = MultiTaperFFTConvol( soi, postSelect, equidistant=equidistant, toi=toi, foi=foi, timeAxis=timeAxis, keeptapers=keeptapers, polyremoval=polyremoval, output_fmt=output, method_kwargs=method_kwargs) elif method == "wavelet": check_effective_parameters(WaveletTransform, defaults, lcls) # Check wavelet selection if wavelet not in availableWavelets: lgl = "'" + "or '".join(opt + "' " for opt in availableWavelets) raise SPYValueError(legal=lgl, varname="wavelet", actual=wavelet) if wavelet not in ["Morlet", "Paul"]: msg = "the chosen wavelet '{}' is real-valued and does not provide " +\ "any information about amplitude or phase of the data. This wavelet function " +\ "may be used to isolate peaks or discontinuities in the signal. " SPYWarning(msg.format(wavelet)) # Check for consistency of `width`, `order` and `wavelet` if wavelet == "Morlet": try: scalar_parser(width, varname="width", lims=[1, np.inf]) except Exception as exc: raise exc wfun = getattr(spywave, wavelet)(w0=width) else: if width != lcls["width"]: msg = "option `width` has no effect for wavelet '{}'" SPYWarning(msg.format(wavelet)) if wavelet == "Paul": try: scalar_parser(order, varname="order", lims=[4, np.inf], ntype="int_like") except Exception as exc: raise exc wfun = getattr(spywave, wavelet)(m=order) elif wavelet == "DOG": try: scalar_parser(order, varname="order", lims=[1, np.inf], ntype="int_like") except Exception as exc: raise exc wfun = getattr(spywave, wavelet)(m=order) else: if order is not None: msg = "option `order` has no effect for wavelet '{}'" SPYWarning(msg.format(wavelet)) wfun = getattr(spywave, wavelet)() # automatic frequency selection if foi is None and foilim is None: scales = get_optimal_wavelet_scales( wfun.scale_from_period, # all availableWavelets sport one! int(minTrialLength * data.samplerate), dt) foi = 1 / wfun.fourier_period(scales) msg = (f"Setting frequencies of interest to {foi[0]:.1f}-" f"{foi[-1]:.1f}Hz") SPYInfo(msg) else: if foilim is not None: foi = np.arange(foilim[0], foilim[1] + 1, dtype=float) # 0 frequency is not valid foi[foi < 0.01] = 0.01 scales = wfun.scale_from_period(1 / foi) # Update `log_dct` w/method-specific options (use `lcls` to get actually # provided keyword values, not defaults set in here) log_dct["foi"] = foi log_dct["wavelet"] = lcls["wavelet"] log_dct["width"] = lcls["width"] log_dct["order"] = lcls["order"] # method specific parameters method_kwargs = { 'samplerate' : data.samplerate, 'scales' : scales, 'wavelet' : wfun } # Set up compute-class specestMethod = WaveletTransform( preSelect, postSelect, toi=toi, timeAxis=timeAxis, polyremoval=polyremoval, output_fmt=output, method_kwargs=method_kwargs) elif method == "superlet": check_effective_parameters(SuperletTransform, defaults, lcls) # check and parse superlet specific arguments if order_max is None: lgl = "Positive integer needed for order_max" raise SPYValueError(legal=lgl, varname="order_max", actual=None) else: scalar_parser( order_max, varname="order_max", lims=[1, np.inf], ntype="int_like" ) scalar_parser( order_min, varname="order_min", lims=[1, order_max], ntype="int_like" ) scalar_parser(c_1, varname="c_1", lims=[1, np.inf], ntype="int_like") # if no frequencies are user selected, take a sensitive default if foi is None and foilim is None: scales = get_optimal_wavelet_scales( superlet.scale_from_period, int(minTrialLength * data.samplerate), dt) foi = 1 / superlet.fourier_period(scales) msg = (f"Setting frequencies of interest to {foi[0]:.1f}-" f"{foi[-1]:.1f}Hz") SPYInfo(msg) else: if foilim is not None: # frequency range in 1Hz steps foi = np.arange(foilim[0], foilim[1] + 1, dtype=float) # 0 frequency is not valid foi[foi < 0.01] = 0.01 scales = superlet.scale_from_period(1. / foi) # FASLT needs ordered frequencies low - high # meaning the scales have to go high - low if adaptive: if len(scales) < 2: lgl = "A range of frequencies" act = "Single frequency" raise SPYValueError(legal=lgl, varname="foi", actual=act) if np.any(np.diff(scales) > 0): msg = "Sorting frequencies low to high for adaptive SLT.." SPYWarning(msg) scales = np.sort(scales)[::-1] log_dct["foi"] = foi log_dct["c_1"] = lcls["c_1"] log_dct["order_max"] = lcls["order_max"] log_dct["order_min"] = lcls["order_min"] # method specific parameters method_kwargs = { 'samplerate' : data.samplerate, 'scales' : scales, 'order_max' : order_max, 'order_min' : order_min, 'c_1' : c_1, 'adaptive' : adaptive } # Set up compute-class specestMethod = SuperletTransform( preSelect, postSelect, toi=toi, timeAxis=timeAxis, polyremoval=polyremoval, output_fmt=output, method_kwargs=method_kwargs) # ------------------------------------------------- # Sanitize output and call the ComputationalRoutine # ------------------------------------------------- # If provided, make sure output object is appropriate if out is not None: try: data_parser(out, varname="out", writable=True, empty=True, dataclass="SpectralData", dimord=SpectralData().dimord) except Exception as exc: raise exc new_out = False else: out = SpectralData(dimord=SpectralData._defaultDimord) new_out = True # Perform actual computation specestMethod.initialize(data, out._stackingDim, chan_per_worker=kwargs.get("chan_per_worker"), keeptrials=keeptrials) specestMethod.compute(data, out, parallel=kwargs.get("parallel"), log_dict=log_dct) # Either return newly created output object or simply quit return out if new_out else None
def wrapper_cfg(*args, **kwargs): # First, parse positional arguments for dict-type inputs (`k` counts the # no. of dicts provided) and convert tuple of positional args to list cfg = None k = 0 args = list(args) for argidx, arg in enumerate(args): if isinstance(arg, dict): cfgidx = argidx k += 1 # If a dict was found, assume it's a `cfg` dict and extract it from # the positional argument list; if more than one dict was found, abort if k == 1: cfg = args.pop(cfgidx) elif k > 1: raise SPYValueError( legal="single `cfg` input", varname="cfg", actual="{0:d} `cfg` objects in input arguments".format(k)) # Now parse provided keywords for `cfg` entry - if `cfg` was already # provided as positional argument, abort if kwargs.get("cfg") is not None: if cfg: lgl = "`cfg` either as positional or keyword argument, not both" raise SPYValueError(legal=lgl, varname="cfg") cfg = kwargs.pop("cfg") # If `cfg` was detected either in positional or keyword arguments, process it if cfg: # If `cfg` is not dict-like, abort (`StructDict` is a `dict` child) if not isinstance(cfg, dict): raise SPYTypeError(cfg, varname="cfg", expected="dictionary-like") # IMPORTANT: create a copy of `cfg` using `StructDict` constructor to # not manipulate `cfg` in user's namespace! cfg = StructDict(cfg) # FIXME # If a method is called using `cfg`, non-default values for # keyword arguments must *only* to be provided via `cfg` defaults = get_defaults(func) for key, value in kwargs.items(): if defaults.get(key, value) != value: raise SPYValueError( legal="no keyword arguments", varname=key, actual="non-default value for {}".format(key)) # Translate any existing "yes" and "no" fields to `True` and `False` for key in cfg.keys(): if str(cfg[key]) == "yes": cfg[key] = True elif str(cfg[key]) == "no": cfg[key] = False # No explicit `cfg`: rename `kwargs` to `cfg` to consolidate processing below; # IMPORTANT: this does *not* create a copy of `kwargs`, thus the `pop`-ing # below actually manipulates `kwargs` as well - crucial for the `kwargs.get("data")` # error checking! else: cfg = kwargs # If `cfg` contains keys 'data' or 'dataset' extract corresponding # entry and make it a positional argument (abort if both 'data' # and 'dataset' are present) data = cfg.pop("data", None) if cfg.get("dataset"): if data: lgl = "either 'data' or 'dataset' in `cfg`/keywords, not both" raise SPYValueError(legal=lgl, varname="cfg") data = cfg.pop("dataset") # If `cfg` did not contain `data`, look into `kwargs` if data is None: data = kwargs.pop("data", None) if kwargs.get("dataset"): if data: lgl = "either `data` or `dataset` keyword, not both" raise SPYValueError(legal=lgl, varname="data/dataset") data = kwargs.pop("dataset") # If Syncopy data object(s) were provided convert single objects to one-element # lists, ensure positional args do *not* contain add'l objects; ensure keyword # args (besides `cfg`) do *not* contain add'l objects; ensure `data` exclusively # contains Syncopy data objects. Finally, rename remaining positional arguments if data: if not isinstance(data, (tuple, list)): data = [data] if any([ isinstance(arg, spy.datatype.base_data.BaseData) for arg in args ]): lgl = "Syncopy data object(s) provided either via `cfg`/keyword or " +\ "positional arguments, not both" raise SPYValueError(legal=lgl, varname="cfg/data") if kwargs.get("data") or kwargs.get("dataset"): lgl = "Syncopy data object(s) provided either via `cfg` or as " +\ "keyword argument, not both" raise SPYValueError(legal=lgl, varname="cfg.data") if any([ not isinstance(obj, spy.datatype.base_data.BaseData) for obj in data ]): raise SPYError("`data` must be Syncopy data object(s)!") posargs = args # If `data` was not provided via `cfg` or as kw-arg, parse positional arguments if data is None: data = [] posargs = [] while args: arg = args.pop(0) if isinstance(arg, spy.datatype.base_data.BaseData): data.append(arg) else: posargs.append(arg) # Call function with unfolded `data` + modified positional/keyword args return func(*data, *posargs, **cfg)
def esi_cluster_setup(partition="8GBS", n_jobs=2, mem_per_job=None, timeout=180, interactive=True, start_client=True, **kwargs): """ Start a distributed Dask cluster of parallel processing workers using SLURM (or local multi-processing) Parameters ---------- partition : str Name of SLURM partition/queue to use n_jobs : int Number of jobs to spawn mem_per_job : None or str Memory booking for each job. Can be specified either in megabytes (e.g., ``mem_per_job = 1500MB``) or gigabytes (e.g., ``mem_per_job = "2GB"``). If `mem_per_job` is `None`, it is attempted to infer a sane default value from the chosen queue, e.g., for ``partition = "8GBS"`` `mem_per_job` is automatically set to the allowed maximum of `'8GB'`. However, even in queues with guaranted memory bookings, it is possible to allocate less memory than the allowed maximum per job to spawn numerous low-memory jobs. See Examples for details. timeout : int Number of seconds to wait for requested jobs to start up. interactive : bool If `True`, user input is required in case not all jobs could be started in the provided waiting period (determined by `timeout`). If `interactive` is `False` and the jobs could not be started within `timeout` seconds, a `TimeoutError` is raised. start_client : bool If `True`, a distributed computing client is launched and attached to the workers. If `start_client` is `False`, only a distributed computing cluster is started to which compute-clients can connect. **kwargs : dict Additional keyword arguments can be used to control job-submission details. Returns ------- proc : object A distributed computing client (if ``start_client = True``) or a distributed computing cluster (otherwise). Examples -------- The following command launches 10 SLURM jobs with 2 gigabytes memory each in the `8GBS` partition >>> spy.esi_cluster_setup(n_jobs=10, partition="8GBS", mem_per_job="2GB") If you want to access properties of the created distributed computing client, assign an explicit return quantity, i.e., >>> client = spy.esi_cluster_setup(n_jobs=10, partition="8GBS", mem_per_job="2GB") The underlying distributed computing cluster can be accessed using >>> client.cluster Notes ----- Syncopy's parallel computing engine relies on the concurrent processing library `Dask <https://docs.dask.org/en/latest/>`_. Thus, the distributed computing clients used by Syncopy are in fact instances of :class:`dask.distributed.Client`. This function specifically acts as a wrapper for :class:`dask_jobqueue.SLURMCluster`. Users familiar with Dask in general and its distributed scheduler and cluster objects in particular, may leverage Dask's entire API to fine-tune parallel processing jobs to their liking (if wanted). See also -------- cluster_cleanup : remove dangling parallel processing job-clusters """ # For later reference: dynamically fetch name of current function funcName = "Syncopy <{}>".format(inspect.currentframe().f_code.co_name) # Be optimistic: prepare success message successMsg = "{name:s} Cluster dashboard accessible at {dash:s}" # Retrieve all partitions currently available in SLURM out, err = subprocess.Popen("sinfo -h -o %P", stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True).communicate() if len(err) > 0: # SLURM is not installed, either allocate `LocalCluster` or just leave if "sinfo: not found" in err: if interactive: msg = "{name:s} SLURM does not seem to be installed on this machine " +\ "({host:s}). Do you want to start a local multi-processing " +\ "computing client instead? " startLocal = user_yesno(msg.format(name=funcName, host=socket.gethostname()), default="no") else: startLocal = True if startLocal: client = Client() successMsg = "{name:s} Local parallel computing client ready. \n" + successMsg print(successMsg.format(name=funcName, dash=client.cluster.dashboard_link)) if start_client: return client return client.cluster return # SLURM is installed, but something's wrong msg = "SLURM queuing system from node {node:s}. " +\ "Original error message below:\n{error:s}" raise SPYIOError(msg.format(node=socket.gethostname(), error=err)) options = out.split() # Make sure we're in a valid partition (exclude IT partitions from output message) if partition not in options: valid = list(set(options).difference(["DEV", "PPC"])) raise SPYValueError(legal="'" + "or '".join(opt + "' " for opt in valid), varname="partition", actual=partition) # Parse job count try: scalar_parser(n_jobs, varname="n_jobs", ntype="int_like", lims=[1, np.inf]) except Exception as exc: raise exc # Get requested memory per job if mem_per_job is not None: if not isinstance(mem_per_job, str): raise SPYTypeError(mem_per_job, varname="mem_per_job", expected="string") if not any(szstr in mem_per_job for szstr in ["MB", "GB"]): lgl = "string representation of requested memory (e.g., '8GB', '12000MB')" raise SPYValueError(legal=lgl, varname="mem_per_job", actual=mem_per_job) # Query memory limit of chosen partition and ensure that `mem_per_job` is # set for partitions w/o limit idx = partition.find("GB") if idx > 0: mem_lim = int(partition[:idx]) * 1000 else: if partition == "PREPO": mem_lim = 16000 else: if mem_per_job is None: lgl = "explicit memory amount as required by partition '{}'" raise SPYValueError(legal=lgl.format(partition), varname="mem_per_job", actual=mem_per_job) mem_lim = np.inf # Consolidate requested memory with chosen partition (or assign default memory) if mem_per_job is None: mem_per_job = str(mem_lim) + "MB" else: if "MB" in mem_per_job: mem_req = int(mem_per_job[:mem_per_job.find("MB")]) else: mem_req = int(round(float(mem_per_job[:mem_per_job.find("GB")]) * 1000)) if mem_req > mem_lim: msg = "`mem_per_job` exceeds limit of {lim:d}GB for partition {par:s}. " +\ "Capping memory at partition limit. " SPYWarning(msg.format(lim=mem_lim, par=partition)) mem_per_job = str(int(mem_lim)) + "GB" # Parse requested timeout period try: scalar_parser(timeout, varname="timeout", ntype="int_like", lims=[1, np.inf]) except Exception as exc: raise exc # Determine if cluster allocation is happening interactively if not isinstance(interactive, bool): raise SPYTypeError(interactive, varname="interactive", expected="bool") # Determine if a dask client was requested if not isinstance(start_client, bool): raise SPYTypeError(start_client, varname="start_client", expected="bool") # Set/get "hidden" kwargs workers_per_job = kwargs.get("workers_per_job", 1) try: scalar_parser(workers_per_job, varname="workers_per_job", ntype="int_like", lims=[1, 8]) except Exception as exc: raise exc n_cores = kwargs.get("n_cores", 1) try: scalar_parser(n_cores, varname="n_cores", ntype="int_like", lims=[1, np.inf]) except Exception as exc: raise exc slurm_wdir = kwargs.get("slurmWorkingDirectory", None) if slurm_wdir is None: usr = getpass.getuser() slurm_wdir = "/mnt/hpx/slurm/{usr:s}/{usr:s}_{date:s}" slurm_wdir = slurm_wdir.format(usr=usr, date=datetime.now().strftime('%Y%m%d-%H%M%S')) os.makedirs(slurm_wdir, exist_ok=True) else: try: io_parser(slurm_wdir, varname="slurmWorkingDirectory", isfile=False) except Exception as exc: raise exc # Hotfix for upgraded cluster-nodes: point to correct Python executable if working from /home pyExec = sys.executable if sys.executable.startswith("/home"): pyExec = "/mnt/gs" + sys.executable # Create `SLURMCluster` object using provided parameters out_files = os.path.join(slurm_wdir, "slurm-%j.out") cluster = SLURMCluster(cores=n_cores, memory=mem_per_job, processes=workers_per_job, local_directory=slurm_wdir, queue=partition, name="spyswarm", python=pyExec, header_skip=["-t", "--mem"], job_extra=["--output={}".format(out_files)]) # interface="asdf", # interface is set via `psutil.net_if_addrs()` # job_extra=["--hint=nomultithread", # "--threads-per-core=1"] # Compute total no. of workers and up-scale cluster accordingly total_workers = n_jobs * workers_per_job cluster.scale(total_workers) # Fire up waiting routine to avoid premature cluster setups if _cluster_waiter(cluster, funcName, total_workers, timeout, interactive): return # Kill a zombie cluster in non-interactive mode if not interactive and _count_running_workers(cluster) == 0: cluster.close() err = "SLURM jobs could not be started within given time-out " +\ "interval of {0:d} seconds" raise TimeoutError(err.format(timeout)) # Highlight how to connect to dask performance monitor print(successMsg.format(name=funcName, dash=cluster.dashboard_link)) # If client was requested, return that instead of the created cluster if start_client: return Client(cluster) return cluster
def validate_taper(taper, tapsmofrq, nTaper, keeptapers, foimax, samplerate, nSamples, output): """ General taper validation and Slepian/dpss input sanitization. The default is to max out `nTaper` to achieve the desired frequency smoothing bandwidth. For details about the Slepion settings see "The Effective Bandwidth of a Multitaper Spectral Estimator, A. T. Walden, E. J. McCoy and D. B. Percival" Parameters ---------- taper : str Windowing function, one of :data:`~syncopy.shared.const_def.availableTapers` tapsmofrq : float or None Taper smoothing bandwidth for `taper='dpss'` nTaper : int_like or None Number of tapers to use for multi-tapering (not recommended) Other Parameters ---------------- keeptapers : bool foimax : float Maximum frequency for the analysis samplerate : float the samplerate in Hz nSamples : int Number of samples output : str, one of {'abs', 'pow', 'fourier'} Fourier transformation output type Returns ------- dpss_opt : dict For multi-tapering (`taper='dpss'`) contains the parameters `NW` and `Kmax` for `scipy.signal.windows.dpss`. For all other tapers this is an empty dictionary. """ # See if taper choice is supported if taper not in availableTapers: lgl = "'" + "or '".join(opt + "' " for opt in availableTapers) raise SPYValueError(legal=lgl, varname="taper", actual=taper) # Warn user about DPSS only settings if taper != "dpss": if tapsmofrq is not None: msg = "`tapsmofrq` is only used if `taper` is `dpss`!" SPYWarning(msg) if nTaper is not None: msg = "`nTaper` is only used if `taper` is `dpss`!" SPYWarning(msg) if keeptapers: msg = "`keeptapers` is only used if `taper` is `dpss`!" SPYWarning(msg) # empty dpss_opt, only Slepians have options return {} # direct mtm estimate (averaging) only valid for spectral power if taper == "dpss" and not keeptapers and output != "pow": lgl = "'pow', the only valid option for taper averaging" raise SPYValueError(legal=lgl, varname="output", actual=output) # Set/get `tapsmofrq` if we're working w/Slepian tapers elif taper == "dpss": # --- minimal smoothing bandwidth --- # --- such that Kmax/nTaper is at least 1 minBw = 2 * samplerate / nSamples # ----------------------------------- # user set tapsmofrq directly if tapsmofrq is not None: try: scalar_parser(tapsmofrq, varname="tapsmofrq", lims=[0, np.inf]) except Exception as exc: raise exc if tapsmofrq < minBw: msg = f'Setting tapsmofrq to the minimal attainable bandwidth of {minBw:.2f}Hz' SPYInfo(msg) tapsmofrq = minBw # we now enforce a user submitted smoothing bw else: lgl = "smoothing bandwidth in Hz, typical values are in the range 1-10Hz" raise SPYValueError(legal=lgl, varname="tapsmofrq", actual=tapsmofrq) # Try to derive "sane" settings by using 3/4 octave # smoothing of highest `foi` # following Hill et al. "Oscillatory Synchronization in Large-Scale # Cortical Networks Predicts Perception", Neuron, 2011 # FIX ME: This "sane setting" seems quite excessive (huuuge bwidths) # tapsmofrq = (foimax * 2**(3 / 4 / 2) - foimax * 2**(-3 / 4 / 2)) / 2 # msg = f'Automatic setting of `tapsmofrq` to {tapsmofrq:.2f}' # SPYInfo(msg) # -------------------------------------------- # set parameters for scipy.signal.windows.dpss NW = tapsmofrq * nSamples / (2 * samplerate) # from the minBw setting NW always is at least 1 Kmax = int(2 * NW - 1) # optimal number of tapers # -------------------------------------------- # the recommended way: # set nTaper automatically to achieve exact effective smoothing bandwidth if nTaper is None: msg = f'Using {Kmax} taper(s) for multi-tapering' SPYInfo(msg) dpss_opt = {'NW': NW, 'Kmax': Kmax} return dpss_opt elif nTaper is not None: try: scalar_parser(nTaper, varname="nTaper", ntype="int_like", lims=[1, np.inf]) except Exception as exc: raise exc if nTaper != Kmax: msg = f''' Manually setting the number of tapers is not recommended and may (strongly) distort the effective smoothing bandwidth!\n The optimal number of tapers is {Kmax}, you have chosen to use {nTaper}. ''' SPYWarning(msg) dpss_opt = {'NW': NW, 'Kmax': nTaper} return dpss_opt
def initialize(self, data, chan_per_worker=None, keeptrials=True): """ Perform dry-run of calculation to determine output shape Parameters ---------- data : syncopy data object Syncopy data object to be processed (has to be the same object that is passed to :meth:`compute` for the actual calculation). chan_per_worker : None or int Number of channels to be processed by each worker (only relevant in case of concurrent processing). If `chan_per_worker` is `None` (default) by-trial parallelism is used, i.e., each worker processes data corresponding to a full trial. If `chan_per_worker > 0`, trials are split into channel-groups of size `chan_per_worker` (+ rest if the number of channels is not divisible by `chan_per_worker` without remainder) and workers are assigned by-trial channel-groups for processing. keeptrials : bool Flag indicating whether to return individual trials or average Returns ------- Nothing : None Notes ----- This class method **has** to be called prior to performing the actual computation realized in :meth:`computeFunction`. See also -------- compute : core routine performing the actual computation """ # First store `keeptrial` keyword value (important for output shapes below) self.keeptrials = keeptrials # Determine if data-selection was provided; if so, extract trials and check # whether selection requires fancy array indexing if data._selection is not None: self.trialList = data._selection.trials self.useFancyIdx = data._selection._useFancy else: self.trialList = list(range(len(data.trials))) self.useFancyIdx = False numTrials = len(self.trialList) # If lists/tuples are in positional arguments, ensure `len == numTrials` # Scalars are duplicated to fit trials, e.g., ``self.argv = [3, [0, 1, 1]]`` # then ``argv = [[3, 3, 3], [0, 1, 1]]`` for ak, arg in enumerate(self.argv): # Ensure arguments are within reasonable size for distribution across workers # (protect against circular object references by imposing max. calls) self._callCount = 0 argsize = self._sizeof(arg) if argsize > self._maxArgSize: lgl = "positional arguments less than 100 MB each" act = "positional argument with memory footprint of {0:4.2f} MB" raise SPYValueError(legal=lgl, varname="argv", actual=act.format(argsize)) if isinstance(arg, (list, tuple)): if len(arg) != numTrials: lgl = "list/tuple of positional arguments for each trial" act = "length of list/tuple does not correspond to number of trials" raise SPYValueError(legal=lgl, varname="argv", actual=act) continue elif isinstance(arg, np.ndarray): if arg.size == numTrials: msg = "found NumPy array with size == #Trials. " +\ "Regardless, every worker will receive an identical copy " +\ "of this array. To propagate elements across workers, use " +\ "a list or tuple instead!" SPYWarning(msg) self.argv[ak] = [arg] * numTrials # Prepare dryrun arguments and determine geometry of trials in output dryRunKwargs = copy(self.cfg) dryRunKwargs["noCompute"] = True chk_list = [] dtp_list = [] trials = [] for tk, trialno in enumerate(self.trialList): trial = data._preview_trial(trialno) trlArg = tuple(arg[tk] for arg in self.argv) chunkShape, dtype = self.computeFunction(trial, *trlArg, **dryRunKwargs) chk_list.append(list(chunkShape)) dtp_list.append(dtype) trials.append(trial) # The aggregate shape is computed as max across all chunks chk_arr = np.array(chk_list) if np.unique(chk_arr[:, 0]).size > 1 and not self.keeptrials: err = "Averaging trials of unequal lengths in output currently not supported!" raise NotImplementedError(err) if np.any([dtp_list[0] != dtp for dtp in dtp_list]): lgl = "unique output dtype" act = "{} different output dtypes".format(np.unique(dtp_list).size) raise SPYValueError(legal=lgl, varname="dtype", actual=act) chunkShape = tuple(chk_arr.max(axis=0)) self.outputShape = (chk_arr[:, 0].sum(),) + chunkShape[1:] self.cfg["chunkShape"] = chunkShape self.dtype = np.dtype(dtp_list[0]) # Ensure channel parallelization can be done at all if chan_per_worker is not None and "channel" not in data.dimord: msg = "input object does not contain `channel` dimension for parallelization!" SPYWarning(msg) chan_per_worker = None if chan_per_worker is not None and self.keeptrials is False: msg = "trial-averaging does not support channel-block parallelization!" SPYWarning(msg) chan_per_worker = None if data._selection is not None: if chan_per_worker is not None and data._selection.channel != slice(None, None, 1): msg = "channel selection and simultaneous channel-block " +\ "parallelization not yet supported!" SPYWarning(msg) chan_per_worker = None # Allocate control variables trial = trials[0] trlArg0 = tuple(arg[0] for arg in self.argv) chunkShape0 = chk_arr[0, :] lyt = [slice(0, stop) for stop in chunkShape0] sourceLayout = [] targetLayout = [] targetShapes = [] ArgV = [] # If parallelization across channels is requested the first trial is # split up into several chunks that need to be processed/allocated if chan_per_worker is not None: # Set up channel-chunking nChannels = data.channel.size rem = int(nChannels % chan_per_worker) n_blocks = [chan_per_worker] * int(nChannels//chan_per_worker) + [rem] * int(rem > 0) inchanidx = data.dimord.index("channel") # Perform dry-run w/first channel-block of first trial to identify # changes in output shape w.r.t. full-trial output (`chunkShape`) shp = list(trial.shape) idx = list(trial.idx) shp[inchanidx] = n_blocks[0] idx[inchanidx] = slice(0, n_blocks[0]) trial.shape = tuple(shp) trial.idx = tuple(idx) res, _ = self.computeFunction(trial, *trlArg0, **dryRunKwargs) outchan = [dim for dim in res if dim not in chunkShape0] if len(outchan) != 1: lgl = "exactly one output dimension to scale w/channel count" act = "{0:d} dimensions affected by varying channel count".format(len(outchan)) raise SPYValueError(legal=lgl, varname="chan_per_worker", actual=act) outchanidx = res.index(outchan[0]) # Get output chunks and grid indices for first trial chanstack = 0 blockstack = 0 for block in n_blocks: shp = list(trial.shape) idx = list(trial.idx) shp[inchanidx] = block idx[inchanidx] = slice(blockstack, blockstack + block) trial.shape = tuple(shp) trial.idx = tuple(idx) res, _ = self.computeFunction(trial, *trlArg0, **dryRunKwargs) lyt[outchanidx] = slice(chanstack, chanstack + res[outchanidx]) targetLayout.append(tuple(lyt)) targetShapes.append(tuple([slc.stop - slc.start for slc in lyt])) sourceLayout.append(trial.idx) ArgV.append(trlArg0) chanstack += res[outchanidx] blockstack += block # Simple: consume all channels simultaneously, i.e., just take the entire trial else: targetLayout.append(tuple(lyt)) targetShapes.append(chunkShape0) sourceLayout.append(trial.idx) ArgV.append(trlArg0) # Construct dimensional layout of output stacking = targetLayout[0][0].stop for tk in range(1, len(self.trialList)): trial = trials[tk] trlArg = tuple(arg[tk] for arg in self.argv) chkshp = chk_list[tk] lyt = [slice(0, stop) for stop in chkshp] lyt[0] = slice(stacking, stacking + chkshp[0]) stacking += chkshp[0] if chan_per_worker is None: targetLayout.append(tuple(lyt)) targetShapes.append(tuple([slc.stop - slc.start for slc in lyt])) sourceLayout.append(trial.idx) ArgV.append(trlArg) else: chanstack = 0 blockstack = 0 for block in n_blocks: shp = list(trial.shape) idx = list(trial.idx) shp[inchanidx] = block idx[inchanidx] = slice(blockstack, blockstack + block) trial.shape = tuple(shp) trial.idx = tuple(idx) res, _ = self.computeFunction(trial, *trlArg, **dryRunKwargs) # FauxTrial lyt[outchanidx] = slice(chanstack, chanstack + res[outchanidx]) targetLayout.append(tuple(lyt)) targetShapes.append(tuple([slc.stop - slc.start for slc in lyt])) sourceLayout.append(trial.idx) chanstack += res[outchanidx] blockstack += block ArgV.append(trlArg) # If the determined source layout contains unordered lists and/or index # repetitions, set `self.useFancyIdx` to `True` and prepare a separate # `sourceSelectors` list that is used in addition to `sourceLayout` for # data extraction. # In this case `sourceLayout` uses ABSOLUTE indices (indices wrt to size # of ENTIRE DATASET) that are SORTED W/O REPS to extract a NumPy array # of appropriate size from HDF5. # Then `sourceLayout` uses RELATIVE indices (indices wrt to size of CURRENT # TRIAL) that can be UNSORTED W/REPS to actually perform the requested # selection on the NumPy array extracted w/`sourceLayout`. for grd in sourceLayout: if any([np.diff(sel).min() <= 0 if isinstance(sel, list) and len(sel) > 1 else False for sel in grd]): self.useFancyIdx = True break if self.useFancyIdx: sourceSelectors = [] for gk, grd in enumerate(sourceLayout): ingrid = list(grd) sigrid = [] for sk, sel in enumerate(grd): if isinstance(sel, list): selarr = np.array(sel, dtype=np.intp) else: # sel is a slice step = sel.step if sel.step is None: step = 1 selarr = np.array(list(range(sel.start, sel.stop, step)), dtype=np.intp) if selarr.size > 0: sigrid.append(np.array(selarr) - selarr.min()) ingrid[sk] = slice(selarr.min(), selarr.max() + 1, 1) else: sigrid.append([]) ingrid[sk] = [] sourceSelectors.append(tuple(sigrid)) sourceLayout[gk] = tuple(ingrid) else: sourceSelectors = [Ellipsis] * len(sourceLayout) # Store determined shapes and grid layout self.sourceLayout = sourceLayout self.sourceSelectors = sourceSelectors self.targetLayout = targetLayout self.targetShapes = targetShapes self.ArgV = ArgV # Compute max. memory footprint of chunks if chan_per_worker is None: self.chunkMem = np.prod(self.cfg["chunkShape"]) * self.dtype.itemsize else: self.chunkMem = max([np.prod(shp) for shp in self.targetShapes]) * self.dtype.itemsize # Get data access mode (only relevant for parallel reading access) self.dataMode = data.mode
def validate_foi(foi, foilim, samplerate): """ Parameters ---------- foi : 'all' or array like or None frequencies of interest foilim : 2-element sequence or None foi limits Other Parameters ---------------- samplerate : float the samplerate in Hz Returns ------- foi, foilim : tuple Either both are `None` or the user submitted one is parsed and returned Notes ----- Setting both `foi` and `foilim` to `None` is valid, the subsequent analysis methods should all have a default way to select a standard set of frequencies (e.g. np.fft.fftfreq). """ if foi is not None and foilim is not None: lgl = "either `foi` or `foilim` specification" act = "both" raise SPYValueError(legal=lgl, varname="foi/foilim", actual=act) if foi is not None: if isinstance(foi, str): if foi == "all": foi = None else: raise SPYValueError(legal="'all' or `None` or list/array", varname="foi", actual=foi) else: try: array_parser(foi, varname="foi", hasinf=False, hasnan=False, lims=[0, samplerate / 2], dims=(None, )) except Exception as exc: raise exc foi = np.array(foi, dtype="float") if foilim is not None: if isinstance(foilim, str): if foilim == "all": foilim = None else: raise SPYValueError(legal="'all' or `None` or `[fmin, fmax]`", varname="foilim", actual=foilim) else: try: array_parser(foilim, varname="foilim", hasinf=False, hasnan=False, lims=[0, samplerate / 2], dims=(2, )) except Exception as exc: raise exc # foilim is of shape (2,) if foilim[0] > foilim[1]: msg = "Sorting foilim low to high.." SPYInfo(msg) foilim = np.sort(foilim) return foi, foilim
def singlepanelplot(self, trials="all", channels="all", toilim=None, avg_channels=True, title=None, grid=None, fig=None, **kwargs): """ Plot contents of :class:`~syncopy.AnalogData` objects using single-panel figure(s) Please refer to :func:`syncopy.singlepanelplot` for detailed usage information. Examples -------- Use :func:`~syncopy.tests.misc.generate_artificial_data` to create two synthetic :class:`~syncopy.AnalogData` objects. >>> from syncopy.tests.misc import generate_artificial_data >>> adata = generate_artificial_data(nTrials=10, nChannels=32) >>> bdata = generate_artificial_data(nTrials=5, nChannels=16) Plot an average of the first 16 channels, averaged across trials 2, 4, and 6: >>> fig = spy.singlepanelplot(adata, channels=range(16), trials=[2, 4, 6]) Overlay average of latter half of channels, averaged across trials 1, 3, 5: >>> fig = spy.singlepanelplot(adata, channels=range(16,32), trials=[1, 3, 5], fig=fig) Do not average channels: >>> fig = spy.singlepanelplot(adata, channels=range(16,32), trials=[1, 3, 5], avg_channels=False) Plot `adata` and `bdata` simultaneously in two separate figures: >>> fig1, fig2 = spy.singlepanelplot(adata, bdata, overlay=False) Overlay `adata` and `bdata`; use channel and trial selections that are valid for both datasets: >>> fig3 = spy.singlepanelplot(adata, bdata, channels=range(16), trials=[1, 2, 3]) See also -------- syncopy.singlepanelplot : visualize Syncopy data objects using single-panel plots """ # Collect input arguments in dict `inputArgs` and process them inputArgs = locals() inputArgs.pop("self") dimArrs, dimCounts, idx, timeIdx, chanIdx = _prep_analog_plots( self, "singlepanelplot", **inputArgs) (nTrials, nChan) = dimCounts (trList, chArr) = dimArrs # If we're overlaying a multi-channel plot, ensure settings match up; also, # do not try to overlay on top of multi-panel plots if hasattr(fig, "multipanelplot"): lgl = "single-panel figure generated by `singleplot`" act = "multi-panel figure generated by `multipanelplot`" raise SPYValueError(legal=lgl, varname="fig", actual=act) if hasattr(fig, "chanOffsets"): if avg_channels: lgl = "multi-channel plot" act = "channel averaging was requested for multi-channel plot overlay" raise SPYValueError(legal=lgl, varname="channels/avg_channels", actual=act) if nChan != len(fig.chanOffsets): lgl = "channel-count matching existing multi-channel panels in figure" act = "{} channels per panel but {} channels for plotting".format( len(fig.chanOffsets), nChan) raise SPYValueError(legal=lgl, varname="channels/channels per panel", actual=act) # Ensure provided timing selection can actually be averaged (leverage # the fact that `toilim` selections exclusively generate slices) if nTrials > 0: tLengths = _prep_toilim_avg(self) # Generic titles for figures overlayTitle = "Overlay of {} datasets" # Either create new figure or fetch existing if fig is None: if nTrials > 0: xLabel = "Time [s]" else: xLabel = "Samples" fig, ax = _setup_figure(1, xLabel=xLabel, grid=grid) fig.analogPlot = True else: ax, = fig.get_axes() # Single-channel panel if avg_channels: # Set up pieces of generic figure titles if nChan > 1: chanTitle = "Average of {} channels".format(nChan) else: chanTitle = chArr[0] # Plot entire timecourse if nTrials == 0: # Do not fetch entire dataset at once, but channel by channel chanSec = np.arange(self.channel.size)[self._selection.channel] pltArr = np.zeros((self.data.shape[timeIdx], ), dtype=self.data.dtype) for chan in chanSec: idx[chanIdx] = chan pltArr += self.data[tuple(idx)].squeeze() pltArr /= nChan # The actual plotting command... ax.plot(pltArr) # Set plot title depending on dataset overlay if fig.objCount == 0: if title is None: title = chanTitle ax.set_title(title, size=pltConfig["singleTitleSize"]) else: handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) if title is None: title = overlayTitle.format(len(handles)) ax.set_title(title, size=pltConfig["singleTitleSize"]) # Average across trials else: # Compute channel-/trial-average time-course: 2D array with slice/list # selection does not require fancy indexing - no need to check this here pltArr = np.zeros((tLengths[0], ), dtype=self.data.dtype) for k, trlno in enumerate(trList): idx[timeIdx] = self._selection.time[k] pltArr += self._get_trial(trlno)[tuple(idx)].mean( axis=chanIdx).squeeze() pltArr /= nTrials # The actual plotting command is literally one line... time = self.time[trList[0]][self._selection.time[0]] ax.plot(time, pltArr, label=os.path.basename(self.filename)) ax.set_xlim([time[0], time[-1]]) # Set plot title depending on dataset overlay if fig.objCount == 0: if title is None: if nTrials > 1: trTitle = "{0}across {1} trials".format( "averaged " if nChan == 1 else "", nTrials) else: trTitle = "Trial #{}".format(trList[0]) title = "{}, {}".format(chanTitle, trTitle) ax.set_title(title, size=pltConfig["singleTitleSize"]) else: handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) if title is None: title = overlayTitle.format(len(handles)) ax.set_title(title, size=pltConfig["singleTitleSize"]) # Multi-channel panel else: # "Raw" data, do not respect any trials if nTrials == 0: # If required, compute max amplitude across provided channels if not hasattr(fig, "chanOffsets"): maxAmps = np.zeros((nChan, ), dtype=self.data.dtype) tickOffsets = maxAmps.copy() chanSec = np.arange(self.channel.size)[self._selection.channel] for k, chan in enumerate(chanSec): idx[chanIdx] = chan pltArr = np.abs(self.data[tuple(idx)].squeeze()) maxAmps[k] = pltArr.max() tickOffsets[k] = pltArr.mean() fig.chanOffsets = np.cumsum([0] + [maxAmps.max()] * (nChan - 1)) fig.tickOffsets = fig.chanOffsets + tickOffsets.mean() # Do not plot all at once but cycle through channels to not overflow memory for k, chan in enumerate(chanSec): idx[chanIdx] = chan ax.plot(self.data[tuple(idx)].squeeze() + fig.chanOffsets[k], color=plt.rcParams["axes.prop_cycle"].by_key()["color"] [fig.objCount], label=os.path.basename(self.filename)) if grid is not None: ax.grid(grid) # Set plot title depending on dataset overlay if fig.objCount == 0: if title is None: if nChan > 1: title = "Entire Data Timecourse of {} channels".format( nChan) else: title = "Entire Data Timecourse of {}".format(chArr[0]) ax.set_yticks(fig.tickOffsets) ax.set_yticklabels(chArr) ax.set_title(title, size=pltConfig["singleTitleSize"]) else: handles, labels = ax.get_legend_handles_labels() ax.legend(handles[::(nChan + 1)], labels[::(nChan + 1)]) if title is None: title = overlayTitle.format(len(handles)) ax.set_title(title, size=pltConfig["singleTitleSize"]) # Average across trial(s) else: # Compute trial-average pltArr = np.zeros((tLengths[0], nChan), dtype=self.data.dtype) for k, trlno in enumerate(trList): idx[timeIdx] = self._selection.time[k] pltArr += np.swapaxes( self._get_trial(trlno)[tuple(idx)], timeIdx, 0) pltArr /= nTrials # If required, compute offsets for multi-channel plot if not hasattr(fig, "chanOffsets"): fig.chanOffsets = np.cumsum([0] + [np.abs(pltArr).max()] * (nChan - 1)) fig.tickOffsets = fig.chanOffsets + np.abs(pltArr).mean() # Plot the entire trial-averaged array at once time = self.time[trList[0]][self._selection.time[0]] ax.plot(time, (pltArr + fig.chanOffsets.reshape(1, nChan)).reshape( time.size, nChan), color=plt.rcParams["axes.prop_cycle"].by_key()["color"][ fig.objCount], label=os.path.basename(self.filename)) if grid is not None: ax.grid(grid) # Set plot title depending on dataset overlay if fig.objCount == 0: if title is None: title = "{0} channels {1}across {2} trials".format( nChan, "averaged " if nTrials > 1 else "", nTrials) ax.set_title(title, size=pltConfig["singleTitleSize"]) else: handles, labels = ax.get_legend_handles_labels() ax.legend(handles[::(nChan + 1)], labels[::(nChan + 1)]) if title is None: title = overlayTitle.format(len(handles)) ax.set_title(title, size=pltConfig["singleTitleSize"]) # Increment overlay-counter and draw figure fig.objCount += 1 plt.draw() self._selection = None return fig