def bundle(self, key, path, new_key): """Copy the data to another HDF5 file with new key.""" data = self.get(key) with h5py.File(path) as h5f: if ss.isspmatrix(data) or isinstance(data, h5sparse.Dataset): h5f = h5sparse.Group(h5f) h5f.create_dataset(new_key, data=data)
def get_function_kwargs(self, will_generate_keys, data, manually_create_dataset=False): kwargs = {} if len(data) > 0: kwargs['data'] = data if manually_create_dataset is True: kwargs['create_dataset_functions'] = { k: partial(self.h5f.create_dataset, k) for k in will_generate_keys } elif manually_create_dataset in SPARSE_FORMAT_SET: kwargs['create_dataset_functions'] = { k: partial(h5sparse.Group(self.h5f).create_dataset, k) for k in will_generate_keys } return kwargs
def write_data(self, result_dict): for data_definition, result in six.viewitems(result_dict): # check nan if ss.isspmatrix(result): if np.isnan(result.data).any(): raise ValueError("data {} have nan".format(data_definition)) elif np.isnan(result).any(): raise ValueError("data {} have nan".format(data_definition)) # write data with SimpleTimer("Writing generated data {} to hdf5 file".format(data_definition), end_in_new_line=False): if data_definition.json() in self.h5f: # self.h5f[key][...] = result raise NotImplementedError("Overwriting not supported. Please report an issue.") else: h5sparse.Group(self.h5f).create_dataset(data_definition.json(), data=result) self.h5f.flush()
def write_data(self, result_dict): for key, result in six.iteritems(result_dict): if ss.isspmatrix(result): if np.isnan(result.data).any(): raise ValueError("data {} have nan".format(key)) elif np.isnan(result).any(): raise ValueError("data {} have nan".format(key)) with SimpleTimer( "Writing generated data {} to hdf5 file".format(key), end_in_new_line=False): if key in self.h5f: # self.h5f[key][...] = result raise NotImplementedError("Overwriting not supported.") else: if (isinstance(result, ss.csc_matrix) or isinstance(result, ss.csr_matrix)): # sparse matrix h5sparse.Group(self.h5f).create_dataset(key, data=result) else: self.h5f.create_dataset(key, data=result) self.h5f.flush()
def get(self, key): if isinstance(key, basestring): return h5sparse.Group(self.h5f)[key] return {k: h5sparse.Group(self.h5f)[k] for k in key}
# read data h5f = h5sparse.File("C:\\Users\\22560\\Desktop\\dis.h5") h5f['sparse/matrix'][1:3] h5f['sparse']['matrix'][1:3].toarray() import h5py # allow us to use h5py to get data h5f = h5py.File("test.h5") h5sparse.Group(h5f)['sparse/matrix'] h5sparse.Dataset(h5f['sparse/matrix']) # test append method in h5sparse import h5sparse import h5py import scipy.sparse as ss import h5sparse
def get(self, data_definition): if isinstance(data_definition, DataDefinition): return h5sparse.Group(self.h5f)[data_definition.json()] return {k: h5sparse.Group(self.h5f)[k.json()] for k in data_definition}