def saveload_warehouse(): """Store all warehouse datasets into HDF5 and reload them. """ import h5py from mvpa.base.hdf5 import obj2hdf, hdf2obj tempdir = tempfile.mkdtemp() # store the whole datasets warehouse in one hdf5 file hdf = h5py.File(os.path.join(tempdir, 'myhdf5.hdf5'), 'w') for d in datasets: obj2hdf(hdf, datasets[d], d) hdf.close() hdf = h5py.File(os.path.join(tempdir, 'myhdf5.hdf5'), 'r') rc_ds = {} for d in hdf: rc_ds[d] = hdf2obj(hdf[d]) hdf.close() #cleanup temp dir shutil.rmtree(tempdir, ignore_errors=True) # return the reconstructed datasets (for use in datasets warehouse) return rc_ds
def from_hdf5(cls, source, name=None): if not externals.exists('h5py'): raise RuntimeError("Missing 'h5py' package -- saving is not possible.") import h5py from mvpa.base.hdf5 import hdf2obj # look if we got an hdf file instance already if isinstance(source, h5py.highlevel.File): own_file = False hdf = source else: own_file = True hdf = h5py.File(source, 'r') if not name is None: # some HDF5 subset is requested if not name in hdf: raise ValueError("Cannot find '%s' group in HDF file %s. " "File contains groups: %s" % (name, source, hdf.keys())) # access the group that should contain the dataset dsgrp = hdf[name] res = hdf2obj(dsgrp) if not isinstance(res, AttrDataset): # TODO: unittest before committing raise ValueError, "%r in %s contains %s not a dataset. " \ "File contains groups: %s." \ % (name, source, type(res), hdf.keys()) return res else: # just consider the whole file res = hdf2obj(hdf) if not isinstance(res, AttrDataset): # TODO: unittest before committing raise ValueError, "Failed to load a dataset from %s. " \ "Loaded %s instead." \ % (source, type(res)) return res raise V
def from_hdf5(cls, source, name=None): """Load a Dataset from HDF5 file Parameters ---------- source : string or h5py.highlevel.File Filename or HDF5's File to load dataset from name : string, optional If file contains multiple entries at the 1st level, if provided, `name` specifies the group to be loaded as the AttrDataset. Returns ------- AttrDataset Raises ------ ValueError """ if not externals.exists('h5py'): raise RuntimeError( "Missing 'h5py' package -- saving is not possible.") import h5py from mvpa.base.hdf5 import hdf2obj # look if we got an hdf file instance already if isinstance(source, h5py.highlevel.File): own_file = False hdf = source else: own_file = True hdf = h5py.File(source, 'r') if not name is None: # some HDF5 subset is requested if not name in hdf: raise ValueError("Cannot find '%s' group in HDF file %s. " "File contains groups: %s" % (name, source, hdf.keys())) # access the group that should contain the dataset dsgrp = hdf[name] res = hdf2obj(dsgrp) if not isinstance(res, AttrDataset): # TODO: unittest before committing raise ValueError, "%r in %s contains %s not a dataset. " \ "File contains groups: %s." \ % (name, source, type(res), hdf.keys()) else: # just consider the whole file res = hdf2obj(hdf) if not isinstance(res, AttrDataset): # TODO: unittest before committing raise ValueError, "Failed to load a dataset from %s. " \ "Loaded %s instead." \ % (source, type(res)) if own_file: hdf.close() return res