def test_invalid_ext(self): # wrong extension with pytest.raises(SPYValueError): filename_parser("test.wrongExtension") # no extension with pytest.raises(SPYValueError): filename_parser("test")
def test_invalid_spy_container(self): fname = "sessionName/sessionName_testTag.analog" with pytest.raises(SPYValueError): filename_parser(fname, is_in_valid_container=True) fname = "wrongContainer.spy/sessionName_testTag.analog" with pytest.raises(SPYValueError): filename_parser(fname, is_in_valid_container=True)
def test_folder_only(self): assert filename_parser("container.spy") == { 'filename': None, 'container': 'container.spy', 'folder': os.getcwd(), 'tag': None, 'basename': 'container', 'extension': '.spy' } folder = "{}/tmp".format("C:" if platform.system() == "Windows" else "") assert filename_parser("/tmp/container.spy") == { 'filename': None, 'container': 'container.spy', 'folder': os.path.normpath(folder), 'tag': None, 'basename': 'container', 'extension': '.spy' }
def test_valid_spy_container(self): fname = "sessionName.spy/sessionName_testTag.analog" assert filename_parser(fname, is_in_valid_container=True) == { "filename": "sessionName_testTag.analog", "container": "sessionName.spy", "folder": os.path.join(os.getcwd(), "sessionName.spy"), "tag": "testTag", "basename": "sessionName", "extension": ".analog" }
def test_with_info_ext(self): fname = "sessionName_testTag.analog.info" assert filename_parser(fname) == { "filename": fname.replace(".info", ""), "container": None, "folder": os.getcwd(), "tag": None, "basename": "sessionName_testTag", "extension": ".analog" }
def test_fname_only(self): fname = "sessionName_testTag.analog" assert filename_parser(fname) == { "filename": fname, "container": None, "folder": os.getcwd(), "tag": None, "basename": "sessionName_testTag", "extension": ".analog" }
def test_with_full_path(self): fname = os.path.normpath( "/tmp/sessionName.spy/sessionName_testTag.analog") folder = "{}/tmp".format("C:" if platform.system() == "Windows" else "") assert filename_parser(fname, is_in_valid_container=True) == { "filename": "sessionName_testTag.analog", "container": "sessionName.spy", "folder": os.path.join(os.path.normpath(folder), "sessionName.spy"), "tag": "testTag", "basename": "sessionName", "extension": ".analog" }
def test_none(self): assert all([value is None for value in filename_parser(None).values()])
def save(out, container=None, tag=None, filename=None, overwrite=False, memuse=100): r"""Save Syncopy data object to disk The underlying array data object is stored in a HDF5 file, the metadata in a JSON file. Both can be placed inside a Syncopy container, which is a regular directory with the extension '.spy'. Parameters ---------- out : Syncopy data object Object to be stored on disk. container : str Path to Syncopy container folder (\*.spy) to be used for saving. If omitted, the extension '.spy' will be added to the folder name. tag : str Tag to be appended to container basename filename : str Explicit path to data file. This is only necessary if the data should not be part of a container folder. An extension (\*.<dataclass>) is added if omitted. The `tag` argument is ignored. overwrite : bool If `True` an existing HDF5 file and its accompanying JSON file is overwritten (without prompt). memuse : scalar Approximate in-memory cache size (in MB) for writing data to disk (only relevant for :class:`syncopy.VirtualData` or memory map data sources) Returns ------- Nothing : None Notes ------ Syncopy objects may also be saved using the class method ``.save`` that acts as a wrapper for :func:`syncopy.save`, e.g., >>> save(obj, container="new_spy_container") is equivalent to >>> obj.save(container="new_spy_container") However, once a Syncopy object has been saved, the class method ``.save`` can be used as a shortcut to quick-save recent changes, e.g., >>> obj.save() writes the current state of `obj` to the data/meta-data files on-disk associated with `obj` (overwriting both in the process). Similarly, >>> obj.save(tag='newtag') saves `obj` in the current container 'new_spy_container' under a different tag. Examples -------- Save the Syncopy data object `obj` on disk in the current working directory without creating a spy-container >>> spy.save(obj, filename="session1") >>> # --> os.getcwd()/session1.<dataclass> >>> # --> os.getcwd()/session1.<dataclass>.info Save `obj` without creating a spy-container using an absolute path >>> spy.save(obj, filename="/tmp/session1") >>> # --> /tmp/session1.<dataclass> >>> # --> /tmp/session1.<dataclass>.info Save `obj` in a new spy-container created in the current working directory >>> spy.save(obj, container="container.spy") >>> # --> os.getcwd()/container.spy/container.<dataclass> >>> # --> os.getcwd()/container.spy/container.<dataclass>.info Save `obj` in a new spy-container created by providing an absolute path >>> spy.save(obj, container="/tmp/container.spy") >>> # --> /tmp/container.spy/container.<dataclass> >>> # --> /tmp/container.spy/container.<dataclass>.info Save `obj` in a new (or existing) spy-container under a different tag >>> spy.save(obj, container="session1.spy", tag="someTag") >>> # --> os.getcwd()/session1.spy/session1_someTag.<dataclass> >>> # --> os.getcwd()/session1.spy/session1_someTag.<dataclass>.info See also -------- syncopy.load : load data created with :func:`syncopy.save` """ # Make sure `out` is a valid Syncopy data object data_parser(out, varname="out", writable=None, empty=False) if filename is None and container is None: raise SPYError('filename and container cannot both be `None`') if container is not None and filename is None: # construct filename from container name if not isinstance(container, str): raise SPYTypeError(container, varname="container", expected="str") if not os.path.splitext(container)[1] == ".spy": container += ".spy" fileInfo = filename_parser(container) filename = os.path.join(fileInfo["folder"], fileInfo["container"], fileInfo["basename"]) # handle tag if tag is not None: if not isinstance(tag, str): raise SPYTypeError(tag, varname="tag", expected="str") filename += '_' + tag elif container is not None and filename is not None: raise SPYError( "container and filename cannot be used at the same time") if not isinstance(filename, str): raise SPYTypeError(filename, varname="filename", expected="str") # add extension if not part of the filename if "." not in os.path.splitext(filename)[1]: filename += out._classname_to_extension() try: scalar_parser(memuse, varname="memuse", lims=[0, np.inf]) except Exception as exc: raise exc if not isinstance(overwrite, bool): raise SPYTypeError(overwrite, varname="overwrite", expected="bool") # Parse filename for validity and construct full path to HDF5 file fileInfo = filename_parser(filename) if fileInfo["extension"] != out._classname_to_extension(): raise SPYError("""Extension in filename ({ext}) does not match data class ({dclass})""".format(ext=fileInfo["extension"], dclass=out.__class__.__name__)) dataFile = os.path.join(fileInfo["folder"], fileInfo["filename"]) # If `out` is to replace its own on-disk representation, be more careful if overwrite and dataFile == out.filename: replace = True else: replace = False # Prevent `out` from trying to re-create its own data file if replace: out.data.flush() h5f = out.data.file dat = out.data trl = h5f["trialdefinition"] else: if not os.path.exists(fileInfo["folder"]): try: os.makedirs(fileInfo["folder"]) except IOError: raise SPYIOError(fileInfo["folder"]) except Exception as exc: raise exc else: if os.path.exists(dataFile): if not os.path.isfile(dataFile): raise SPYIOError(dataFile) if overwrite: try: h5f = h5py.File(dataFile, mode="w") h5f.close() except Exception as exc: msg = "Cannot overwrite {} - file may still be open. " msg += "Original error message below\n{}" raise SPYError(msg.format(dataFile, str(exc))) else: raise SPYIOError(dataFile, exists=True) h5f = h5py.File(dataFile, mode="w") # Save each member of `_hdfFileDatasetProperties` in target HDF file for datasetName in out._hdfFileDatasetProperties: dataset = getattr(out, datasetName) # Member is a memory map if isinstance(dataset, np.memmap): # Given memory cap, compute how many data blocks can be grabbed # per swipe (divide by 2 since we're working with an add'l tmp array) memuse *= 1024**2 / 2 nrow = int( memuse / (np.prod(dataset.shape[1:]) * dataset.dtype.itemsize)) rem = int(dataset.shape[0] % nrow) n_blocks = [nrow] * int( dataset.shape[0] // nrow) + [rem] * int(rem > 0) # Write data block-wise to dataset (use `clear` to wipe blocks of # mem-maps from memory) dat = h5f.create_dataset(datasetName, dtype=dataset.dtype, shape=dataset.shape) for m, M in enumerate(n_blocks): dat[m * nrow:m * nrow + M, :] = out.data[m * nrow:m * nrow + M, :] out.clear() # Member is a HDF5 dataset else: dat = h5f.create_dataset(datasetName, data=dataset) # Now write trial-related information trl_arr = np.array(out.trialdefinition) if replace: trl[()] = trl_arr trl.flush() else: trl = h5f.create_dataset("trialdefinition", data=trl_arr, maxshape=(None, trl_arr.shape[1])) # Write to log already here so that the entry can be exported to json infoFile = dataFile + FILE_EXT["info"] out.log = "Wrote files " + dataFile + "\n\t\t\t" + 2 * " " + infoFile # While we're at it, write cfg entries out.cfg = { "method": sys._getframe().f_code.co_name, "files": [dataFile, infoFile] } # Assemble dict for JSON output: order things by their "readability" outDict = OrderedDict(startInfoDict) outDict["filename"] = fileInfo["filename"] outDict["dataclass"] = out.__class__.__name__ outDict["data_dtype"] = dat.dtype.name outDict["data_shape"] = dat.shape outDict["data_offset"] = dat.id.get_offset() outDict["trl_dtype"] = trl.dtype.name outDict["trl_shape"] = trl.shape outDict["trl_offset"] = trl.id.get_offset() if isinstance(out.data, np.ndarray): if np.isfortran(out.data): outDict["order"] = "F" else: outDict["order"] = "C" for key in out._infoFileProperties: value = getattr(out, key) if isinstance(value, np.ndarray): value = value.tolist() # potentially nested dicts elif isinstance(value, dict): value = dict(value) _dict_converter(value) outDict[key] = value # Save relevant stuff as HDF5 attributes for key in out._hdfFileAttributeProperties: if outDict[key] is None: h5f.attrs[key] = "None" else: try: h5f.attrs[key] = outDict[key] except RuntimeError: msg = "Too many entries in `{}` - truncating HDF5 attribute. " +\ "Please refer to {} for complete listing." info_fle = os.path.split( os.path.split(filename.format(ext=FILE_EXT["info"]))[0])[1] info_fle = os.path.join( info_fle, os.path.basename(filename.format(ext=FILE_EXT["info"]))) SPYWarning(msg.format(key, info_fle)) h5f.attrs[key] = [outDict[key][0], "...", outDict[key][-1]] # Re-assign filename after saving (and remove source in case it came from `__storage__`) if not replace: h5f.close() if __storage__ in out.filename: out.data.file.close() os.unlink(out.filename) out.data = dataFile # Compute checksum and finally write JSON (automatically overwrites existing) outDict["file_checksum"] = hash_file(dataFile) with open(infoFile, 'w') as out_json: json.dump(outDict, out_json, indent=4) return
def load(filename, tag=None, dataclass=None, checksum=False, mode="r+", out=None): """ Load Syncopy data object(s) from disk Either loads single files within or outside of '.spy'-containers or loads multiple objects from a single '.spy'-container. Loading from containers can be further controlled by imposing restrictions on object class(es) (via `dataclass`) and file-name tag(s) (via `tag`). Parameters ---------- filename : str Either path to Syncopy container folder (\*.spy, if omitted, the extension '.spy' will be appended) or name of data or metadata file. If `filename` points to a container and no further specifications are provided, the entire contents of the container is loaded. Otherwise, specific objects may be selected using the `dataclass` or `tag` keywords (see below). tag : None or str or list If `filename` points to a container, `tag` may be used to filter objects by filename-`tag`. Multiple tags can be provided using a list, e.g., ``tag = ['experiment1', 'experiment2']``. Can be combined with `dataclass` (see below). Invalid if `filename` points to a single file. dataclass : None or str or list If provided, only objects of provided dataclass are loaded from disk. Available options are '.analog', '.spectral', .spike' and '.event' (as listed in ``spy.FILE_EXT["data"]``). Multiple class specifications can be provided using a list, e.g., ``dataclass = ['.analog', '.spike']``. Can be combined with `tag` (see above) and is also valid if `filename` points to a single file (e.g., to ensure loaded object is of a specific type). checksum : bool If `True`, checksum-matching is performed on loaded object(s) to ensure data-integrity (impairs performance particularly when loading large files). mode : str Data access mode of loaded objects (can be 'r' for read-only, 'r+' or 'w' for read/write access). out : Syncopy data object Empty object to be filled with data loaded from disk. Has to match the type of the on-disk file (e.g., ``filename = 'mydata.analog'`` requires `out` to be a :class:`syncopy.AnalogData` object). Can only be used when loading single objects from disk (`out` is ignored when multiple files are loaded from a container). Returns ------- Nothing : None If a single file is loaded and `out` was provided, `out` is filled with data loaded from disk, i.e., :func:`syncopy.load` does **not** create a new object obj : Syncopy data object If a single file is loaded and `out` was `None`, :func:`syncopy.load` returns a new object. objdict : dict If multiple files are loaded, :func:`syncopy.load` creates a new object for each file and places them in a dictionary whose keys are the base-names (sans path) of the corresponding files. Notes ----- All of Syncopy's classes offer (limited) support for data loading upon object creation. Just as the class method ``.save`` can be used as a shortcut for :func:`syncopy.save`, Syncopy objects can be created from Syncopy data-files upon creation, e.g., >>> adata = spy.AnalogData('/path/to/session1.analog') creates a new :class:`syncopy.AnalogData` object and immediately fills it with data loaded from the file "/path/to/session1.analog". Since only one object can be created at a time, this loading shortcut only supports single file specifications (i.e., ``spy.AnalogData("container.spy")`` is invalid). Examples -------- Load all objects found in the spy-container "sessionName" (the extension ".spy" may or may not be provided) >>> objectDict = spy.load("sessionName") >>> # --> returns a dict with base-filenames as keys Load all :class:`syncopy.AnalogData` and :class:`syncopy.SpectralData` objects from the spy-container "sessionName" >>> objectDict = spy.load("sessionName.spy", dataclass=['analog', 'spectral']) Load a specific :class:`syncopy.AnalogData` object from the above spy-container >>> obj = spy.load("sessionName.spy/sessionName_someTag.analog") This is equivalent to >>> obj = spy.AnalogData("sessionName.spy/sessionName_someTag.analog") If the "sessionName" spy-container only contains one object with the tag "someTag", the above call is equivalent to >>> obj = spy.load("sessionName.spy", tag="someTag") If there are multiple objects of different types using the same tag "someTag", the above call can be further narrowed down to only load the requested :class:`syncopy.AnalogData` object >>> obj = spy.load("sessionName.spy", tag="someTag", dataclass="analog") See also -------- syncopy.save : save syncopy object on disk """ # Ensure `filename` is either a valid .spy container or data file: if `filename` # is a directory w/o '.spy' extension, append it if not isinstance(filename, str): raise SPYTypeError(filename, varname="filename", expected="str") if len(os.path.splitext(os.path.abspath( os.path.expanduser(filename)))[1]) == 0: filename += FILE_EXT["dir"] try: fileInfo = filename_parser(filename) except Exception as exc: raise exc if tag is not None: if isinstance(tag, str): tags = [tag] else: tags = tag try: array_parser(tags, varname="tag", ntype=str) except Exception as exc: raise exc if fileInfo["filename"] is not None: raise SPYError("Only containers can be loaded with `tag` keyword!") for tk in range(len(tags)): tags[tk] = "*" + tags[tk] + "*" else: tags = "*" # If `dataclass` was provided, format it for our needs (e.g. 'spike' -> ['.spike']) if dataclass is not None: if isinstance(dataclass, str): dataclass = [dataclass] try: array_parser(dataclass, varname="dataclass", ntype=str) except Exception as exc: raise exc dataclass = [ "." + dclass if not dclass.startswith(".") else dclass for dclass in dataclass ] extensions = set(dataclass).intersection(FILE_EXT["data"]) if len(extensions) == 0: lgl = "extension(s) '" + "or '".join(ext + "' " for ext in FILE_EXT["data"]) raise SPYValueError(legal=lgl, varname="dataclass", actual=str(dataclass)) # Avoid any misunderstandings here... if not isinstance(checksum, bool): raise SPYTypeError(checksum, varname="checksum", expected="bool") # Abuse `AnalogData.mode`-setter to vet `mode` try: spd.AnalogData().mode = mode except Exception as exc: raise exc # If `filename` points to a spy container, `glob` what's inside, otherwise just load if fileInfo["filename"] is None: if dataclass is None: extensions = FILE_EXT["data"] container = os.path.join(fileInfo["folder"], fileInfo["container"]) fileList = [] for ext in extensions: for tag in tags: fileList.extend(glob(os.path.join(container, tag + ext))) if len(fileList) == 0: fsloc = os.path.join(container, "" + \ "or ".join(tag + " " for tag in tags) + \ "with extensions " + \ "or ".join(ext + " " for ext in extensions)) raise SPYIOError(fsloc, exists=False) if len(fileList) == 1: return _load(fileList[0], checksum, mode, out) if out is not None: msg = "When loading multiple objects, the `out` keyword is ignored" SPYWarning(msg) objectDict = {} for fname in fileList: obj = _load(fname, checksum, mode, None) objectDict[os.path.basename(obj.filename)] = obj return objectDict else: if dataclass is not None: if os.path.splitext(fileInfo["filename"])[1] not in dataclass: lgl = "extension '" + \ "or '".join(dclass + "' " for dclass in dataclass) raise SPYValueError(legal=lgl, varname="filename", actual=fileInfo["filename"]) return _load(filename, checksum, mode, out)
def _load(filename, checksum, mode, out): """ Local helper """ fileInfo = filename_parser(filename) hdfFile = os.path.join(fileInfo["folder"], fileInfo["filename"]) jsonFile = hdfFile + FILE_EXT["info"] try: _ = io_parser(hdfFile, varname="hdfFile", isfile=True, exists=True) _ = io_parser(jsonFile, varname="jsonFile", isfile=True, exists=True) except Exception as exc: raise exc with open(jsonFile, "r") as file: jsonDict = json.load(file) if "dataclass" not in jsonDict.keys(): raise SPYError( "Info file {} does not contain a dataclass field".format(jsonFile)) if hasattr(spd, jsonDict["dataclass"]): dataclass = getattr(spd, jsonDict["dataclass"]) else: raise SPYError("Unknown data class {class}".format( jsonDict["dataclass"])) requiredFields = tuple( startInfoDict.keys()) + dataclass._infoFileProperties for key in requiredFields: if key not in jsonDict.keys(): raise SPYError( "Required field {field} for {cls} not in {file}".format( field=key, cls=dataclass.__name__, file=jsonFile)) # If `_hdr` is an empty list, set it to `None` to not confuse meta-functions hdr = jsonDict.get("_hdr") if isinstance(hdr, (list, np.ndarray)): if len(hdr) == 0: jsonDict["_hdr"] = None # FIXME: add version comparison (syncopy.__version__ vs jsonDict["_version"]) # If wanted, perform checksum matching if checksum: hsh_msg = "hash = {hsh:s}" hsh = hash_file(hdfFile) if hsh != jsonDict["file_checksum"]: raise SPYValueError( legal=hsh_msg.format(hsh=jsonDict["file_checksum"]), varname=os.path.basename(hdfFile), actual=hsh_msg.format(hsh=hsh)) # Parsing is done, create new or check provided object if out is not None: try: data_parser(out, varname="out", writable=True, dataclass=jsonDict["dataclass"]) except Exception as exc: raise exc new_out = False else: out = dataclass() new_out = True # First and foremost, assign dimensional information dimord = jsonDict.pop("dimord") out.dimord = dimord # Access data on disk (error checking is done by setters) out.mode = mode for datasetProperty in out._hdfFileDatasetProperties: setattr(out, datasetProperty, h5py.File(hdfFile, mode="r")[datasetProperty]) # Abuse ``definetrial`` to set trial-related props trialdef = h5py.File(hdfFile, mode="r")["trialdefinition"][()] out.definetrial(trialdef) # Assign metadata for key in [ prop for prop in dataclass._infoFileProperties if prop != "dimord" ]: setattr(out, key, jsonDict[key]) # Write `cfg` entries thisMethod = sys._getframe().f_code.co_name.replace("_", "") out.cfg = {"method": thisMethod, "files": [hdfFile, jsonFile]} # Write log-entry msg = "Read files v. {ver:s} ".format(ver=jsonDict["_version"]) msg += "{hdf:s}\n\t" + (len(msg) + len(thisMethod) + 2) * " " + "{json:s}" out.log = msg.format(hdf=hdfFile, json=jsonFile) # Happy breakdown return out if new_out else None