def test_write_block(self): """ Test that writing to a user block does not destroy the file """ name = self.mktemp() f = File(name, 'w', userblock_size=512) f.create_group("Foobar") f.close() pyfile = open(name, 'r+b') try: pyfile.write(b'X'*512) finally: pyfile.close() f = h5py.File(name, 'r') try: assert "Foobar" in f finally: f.close() pyfile = open(name, 'rb') try: self.assertEqual(pyfile.read(512), b'X'*512) finally: pyfile.close()
def test_issue_212(self): """ Issue 212 Fails with: AttributeError: 'SharedConfig' object has no attribute 'lapl' """ def closer(x): def w(): try: if x: x.close() except IOError: pass return w orig_name = self.mktemp() new_name = self.mktemp() f = File(orig_name, 'w') self.addCleanup(closer(f)) f.create_group('a') f.close() g = File(new_name, 'w') self.addCleanup(closer(g)) g['link'] = ExternalLink(orig_name, '/') # note root group g.close() h = File(new_name, 'r') self.addCleanup(closer(h)) self.assertIsInstance(h['link']['a'], Group)
def test_close_multiple_mpio_driver(self): """ MPIO driver and options """ from mpi4py import MPI fname = self.mktemp() f = File(fname, 'w', driver='mpio', comm=MPI.COMM_WORLD) f.create_group("test") f.close() f.close()
def test_backing(self): """ Core driver saves to file when backing store used """ fname = self.mktemp() fid = File(fname, 'w', driver='core', backing_store=True) fid.create_group('foo') fid.close() fid = File(fname, 'r') assert 'foo' in fid fid.close()
def test_create(self): """ Mode 'w' opens file in overwrite mode """ fname = self.mktemp() fid = File(fname, 'w') self.assertTrue(fid) fid.create_group('foo') fid.close() fid = File(fname, 'w') self.assertNotIn('foo', fid) fid.close()
def test_readwrite(self): """ Mode 'r+' opens existing file in readwrite mode """ fname = self.mktemp() fid = File(fname, 'w') fid.create_group('foo') fid.close() fid = File(fname, 'r+') assert 'foo' in fid fid.create_group('bar') assert 'bar' in fid fid.close()
def test_readonly(self): """ Mode 'r' opens file in readonly mode """ fname = self.mktemp() fid = File(fname, 'w') fid.close() self.assertFalse(fid) fid = File(fname, 'r') self.assertTrue(fid) with self.assertRaises(ValueError): fid.create_group('foo') fid.close()
def test_readonly(self): """ Core driver can be used to open existing files """ fname = self.mktemp() fid = File(fname, 'w') fid.create_group('foo') fid.close() fid = File(fname, 'r', driver='core') self.assertTrue(fid) assert 'foo' in fid with self.assertRaises(ValueError): fid.create_group('bar') fid.close()
class BaseMapping(BaseGroup): """ Base class for mapping tests """ def setUp(self): self.f = File(self.mktemp(), 'w') self.groups = ('a','b','c','d') for x in self.groups: self.f.create_group(x) self.f['x'] = h5py.SoftLink('/mongoose') self.groups = self.groups + ('x',) def tearDown(self): if self.f: self.f.close()
def save_model_state_dict( hf: h5py.File, state_dict: Dict[str, ModelParameter], ) -> None: g = hf.create_group(MODEL_STATE_DICT_GROUP, track_order=True) for public_name, param in state_dict.items(): dataset = g.create_dataset(public_name, data=param.tensor.numpy()) dataset.attrs[STATE_DICT_KEY_ATTR] = param.private_name
def test_readonly_delete_exception(self): """ Deleting object in readonly file raises KeyError """ # Note: it is impossible to restore the old behavior (ValueError) # without breaking the above test (non-existing objects) fname = self.mktemp() hfile = File(fname, 'w') try: hfile.create_group('foo') finally: hfile.close() hfile = File(fname, 'r') try: with self.assertRaises(KeyError): del hfile['foo'] finally: hfile.close()
def test_append(self): """ Mode 'a' opens file in append/readwrite mode, creating if necessary """ fname = self.mktemp() fid = File(fname, 'a') try: self.assertTrue(fid) fid.create_group('foo') assert 'foo' in fid finally: fid.close() fid = File(fname, 'a') try: assert 'foo' in fid fid.create_group('bar') assert 'bar' in fid finally: fid.close()
def test_mode(self): """ Retrieved File objects have a meaningful mode attribute """ hfile = File(self.mktemp(), 'w') try: grp = hfile.create_group('foo') self.assertEqual(grp.file.mode, hfile.mode) finally: hfile.close()
def test_mode(self): """ Retrieved File objects have a meaningful mode attribute """ hfile = File(self.mktemp(),'w') try: grp = hfile.create_group('foo') self.assertEqual(grp.file.mode, hfile.mode) finally: hfile.close()
def test_readonly_delete_exception(self): """ Deleting object in readonly file raises KeyError """ # Note: it is impossible to restore the old behavior (ValueError) # without breaking the above test (non-existing objects) fname = self.mktemp() hfile = File(fname,'w') try: hfile.create_group('foo') finally: hfile.close() hfile = File(fname, 'r') try: with self.assertRaises(KeyError): del hfile['foo'] finally: hfile.close()
def create(cls, file: h5py.File, name: str) -> None: """Create a group containing the required datasets and attributes.""" group = file.create_group(name) for dataset_name in cls._MANDATORY_DATASETS: group.create_dataset(dataset_name) for attr in cls._MANDATORY_ATTRS: group.attrs[attr] = None
def fold2foldfile(df: pd.DataFrame, out_file: h5py.File, fold_idx: int, cont_feats: List[str], cat_feats: List[str], targ_feats: Union[str, List[str]], targ_type: Any, misc_feats: Optional[List[str]] = None, wgt_feat: Optional[str] = None, matrix_lookup: Optional[List[str]] = None, matrix_missing: Optional[np.ndarray] = None, matrix_shape: Optional[Tuple[int, int]] = None) -> None: r''' Save fold of data into an h5py Group Arguments: df: Dataframe from which to save data out_file: h5py file to save data in fold_idx: ID for the fold; used name h5py group according to 'fold_{fold_idx}' cont_feats: list of columns in df to save as continuous variables cat_feats: list of columns in df to save as discreet variables targ_feats: (list of) column(s) in df to save as target feature(s) targ_type: type of target feature, e.g. int,'float32' misc_feats: any extra columns to save wgt_feat: column to save as data weights matrix_vecs: list of objects for matrix encoding, i.e. feature prefixes matrix_feats_per_vec: list of features per vector for matrix encoding, i.e. feature suffixes. Features listed but not present in df will be replaced with NaN. matrix_row_wise: whether objects encoded as a matrix should be encoded row-wise (i.e. all the features associated with an object are in their own row), or column-wise (i.e. all the features associated with an object are in their own column) ''' # TODO infer target type automatically grp = out_file.create_group(f'fold_{fold_idx}') save_to_grp( np.hstack((df[cont_feats].values.astype('float32'), df[cat_feats].values.astype('float32'))), grp, 'inputs') if targ_feats in df.columns: save_to_grp(df[targ_feats].values.astype(targ_type), grp, 'targets') else: print(f'{targ_feats} not found in file') if wgt_feat is not None: if wgt_feat in df.columns: save_to_grp(df[wgt_feat].values.astype('float32'), grp, 'weights') else: print(f'{wgt_feat} not found in file') if misc_feats is not None: for f in misc_feats: if f in df.columns: save_to_grp(df[f].values, grp, f) else: print(f'{f} not found in file') if matrix_lookup is not None: mat = df[matrix_lookup].values mat[:, matrix_missing] = np.NaN mat = mat.reshape((len(df), *matrix_shape)) save_to_grp(mat, grp, 'matrix_inputs')
def test_swmr_mode_consistency(self): fname = self.mktemp() fid = File(fname, 'w', libver='latest') g = fid.create_group('foo') assert fid.swmr_mode == g.file.swmr_mode == False fid.swmr_mode = True # This setter should affect both fid and group member file attribute assert fid.swmr_mode == g.file.swmr_mode == True fid.close()
def weightsToHDF(w, name): f=File(name+".h5","w") weights=f.create_group("Weights") for i in range(len(w[:-1])): weights.create_dataset("Hidden "+str(i+1),data=w[i]) weights.create_dataset("Output",data=w[-1]) f.close()
def _grp_from_path(fd: h5py.File, path: pathlib.Path): parts = path.parts[-1].split('.')[-2].split('-') epoch = int(parts[-1]) kind = parts[-2] assert kind == 'train' or kind == 'valid', kind grp = fd.create_group('{}/{}'.format(kind, epoch)) return grp
def test_close(self): """ All retrieved File objects are closed at the same time """ fname = self.mktemp() hfile = File(fname, 'w') grp = hfile.create_group('foo') hfile2 = grp.file hfile3 = hfile['/'].file hfile2.close() self.assertFalse(hfile) self.assertFalse(hfile2) self.assertFalse(hfile3)
def _save_as_hdf5_rec(cls, obj: Mapping[str, Union[Mapping, np.ndarray]], root: h5py.File): for k, v in obj.items(): if isinstance(v, np.ndarray): root.create_dataset(name=k, data=v) elif isinstance(v, dict): grp = root.create_group(name=k) cls._save_as_hdf5_rec(v, grp) elif isinstance(v, Number): root.create_dataset(name=k, data=v) else: raise ValueError(f'Does not support type {type(v)}')
def test_file_mode_generalizes(self): fname = self.mktemp() fid = File(fname, 'w', libver='latest') g = fid.create_group('foo') # fid and group member file attribute should have the same mode assert fid.mode == g.file.mode == 'r+' fid.swmr_mode = True # fid and group member file attribute should still be 'r+' # even though file intent has changed assert fid.mode == g.file.mode == 'r+' fid.close()
def _write_header(self, file_handle: h5py.File): self._update_header() group = file_handle.create_group('header') for key, val in self._header.items(): if isinstance(val, bytes): dset = group.create_dataset(key, (), dtype=f'S{FILEIDENT_LEN}') dset[()] = val else: group.create_dataset(name=key, data=val)
def add_meta_data(out_file: h5py.File, feats: List[str], cont_feats: List[str], cat_feats: List[str], cat_maps: Optional[Dict[str, Dict[int, Any]]], targ_feats: Union[str, List[str]], wgt_feat: Optional[str] = None, matrix_vecs: Optional[List[str]] = None, matrix_feats_per_vec: Optional[List[str]] = None, matrix_row_wise: Optional[bool] = None) -> None: r''' Adds meta data to foldfile containing information about the data: feature names, matrix information, etc. :class:`~lumin.nn.data.fold_yielder.FoldYielder` objects will access this and automatically extract it to save the user from having to manually pass lists of features. Arguments: out_file: h5py file to save data in feats: list of all features in data cont_feats: list of continuous features cat_feats: list of categorical features cat_maps: Dictionary mapping categorical features to dictionary mapping codes to categories targ_feats: (list of) target feature(s) wgt_feat: name of weight feature matrix_vecs: list of objects for matrix encoding, i.e. feature prefixes matrix_feats_per_vec: list of features per vector for matrix encoding, i.e. feature suffixes. Features listed but not present in df will be replaced with NaN. matrix_row_wise: whether objects encoded as a matrix should be encoded row-wise (i.e. all the features associated with an object are in their own row), or column-wise (i.e. all the features associated with an object are in their own column) ''' grp = out_file.create_group('meta_data') grp.create_dataset('cont_feats', data=json.dumps(cont_feats)) grp.create_dataset('cat_feats', data=json.dumps(cat_feats)) grp.create_dataset('targ_feats', data=json.dumps(targ_feats)) if wgt_feat is not None: grp.create_dataset('wgt_feat', data=json.dumps(wgt_feat)) if cat_maps is not None: grp.create_dataset('cat_maps', data=json.dumps(cat_maps)) if matrix_vecs is not None: lookup, missing, shape = _build_matrix_lookups(feats, matrix_vecs, matrix_feats_per_vec, matrix_row_wise) use = list(np.array(lookup)[np.logical_not( missing)]) # Only features present in data grp.create_dataset('matrix_feats', data=json.dumps({ 'present_feats': use, 'vecs': matrix_vecs, 'missing': [int(m) for m in missing], 'feats_per_vec': matrix_feats_per_vec, 'row_wise': matrix_row_wise, 'shape': shape }))
def write_esh5_orbitals(cell, name, kpts=numpy.zeros((1, 3), dtype=numpy.float64)): """Writes periodic AO basis to hdf5 file. Parameters ---------- cell: PySCF get.Cell object PySCF cell object which contains information of the system, including AO basis set, FFT mesh, unit cell information, etc. name: string Name of hdf5 file. kpts: array. Default: numpy.zeros((1,3) K-point array of dimension (nkpts, 3) dtype: datatype. Default: numpy.float64 Datatype of orbitals in file. """ def to_qmcpack_complex(array): shape = array.shape return array.view(numpy.float64).reshape(shape + (2, )) nao = cell.nao_nr() fh5 = File(name, 'w') coords = cell.gen_uniform_grids(cell.mesh) kpts = numpy.asarray(kpts) nkpts = len(kpts) norbs = numpy.zeros((nkpts, ), dtype=int) norbs[:] = nao grp = fh5.create_group("OrbsG") dset = grp.create_dataset("reciprocal_vectors", data=cell.reciprocal_vectors()) dset = grp.create_dataset("number_of_kpoints", data=len(kpts)) dset = grp.create_dataset("kpoints", data=kpts) dset = grp.create_dataset("number_of_orbitals", data=norbs) dset = grp.create_dataset("fft_grid", data=cell.mesh) dset = grp.create_dataset("grid_type", data=int(0)) nnr = cell.mesh[0] * cell.mesh[1] * cell.mesh[2] # loop over kpoints later for (ik, k) in enumerate(kpts): ao = numint.KNumInt().eval_ao(cell, coords, k)[0] fac = numpy.exp(-1j * numpy.dot(coords, k)) for i in range(norbs[ik]): aoi = fac * numpy.asarray(ao[:, i].T, order='C') aoi_G = tools.fft(aoi, cell.mesh) aoi_G = aoi_G.reshape(cell.mesh).transpose(2, 1, 0).reshape(nnr) dset = grp.create_dataset('kp' + str(ik) + '_b' + str(i), data=to_qmcpack_complex(aoi_G)) fh5.close()
def store_h5py(self, x_train, y_train, x_val, y_val, x_test, y_test): """ HDF5 storage. Has the pros of much faster I/O and compressed size than SQL storage and the dis of memory vs the solid storage.""" try: from h5py import File hdf = File(self.config.data.HDFS_INTERNAL_DATA_FILENAME, "w") except IOError as e: TextProcessing.logToFile.error( "The internal file failed to open for write in <TextProcessing/store_h5py" ) TextProcessing.logToFile.error(e) else: try: group_data = hdf.create_group("dataset") group_train = group_data.create_group("train") group_val = group_data.create_group("val") group_test = group_data.create_group("test") group_train.create_dataset("x_trainset", data=x_train, compression="gzip") group_train.create_dataset("y_trainset", data=y_train, compression="gzip") group_val.create_dataset("x_valset", data=x_val, compression="gzip") group_val.create_dataset("y_valset", data=y_val, compression="gzip") group_test.create_dataset("x_testset", data=x_test, compression="gzip") group_test.create_dataset("y_testset", data=y_test, compression="gzip") except IOError as e: TextProcessing.logToFile.error( "Failed to store in hdfs in <TextProcessing/store_h5py") TextProcessing.logToFile.error(e) else: hdf.close() TextProcessing.logToFile.logger.info( "Successful creation of data file with in-house text processing." ) TextProcessing.logToStream.logger.info( "Successful creation of data file with in-house text processing." )
def archive_astra_with_distgen(astra_object, distgen_object, archive_file=None, astra_group='astra', distgen_group='distgen'): """ Creates a new archive_file (hdf5) with groups for astra and distgen. Calls .archive method of Astra and Distgen objects, into these groups. """ h5 = File(archive_file, 'w') #fingerprint = tools.fingerprint(astra_object.input.update(distgen.input)) g = h5.create_group(distgen_group) distgen_object.archive(g) g = h5.create_group(astra_group) astra_object.archive(g) h5.close()
def write(self, h5, name=None): """ Writes openPMD-beamphysics format to an open h5 handle, or new file if h5 is a str. """ if isinstance(h5, str): fname = os.path.expandvars(os.path.expanduser(h5)) h5 = File(fname, 'w') pmd_field_init(h5, externalFieldPath='/ExternalFieldPath/%T/') g = h5.create_group('/ExternalFieldPath/1/') else: g = h5 write_pmd_field(g, self.data, name=name)
class TestVisit(TestCase): """ Feature: The .visit and .visititems methods allow iterative access to group and subgroup members """ def setUp(self): self.f = File(self.mktemp(), 'w') self.groups = [ 'grp1', 'grp1/sg1', 'grp1/sg2', 'grp2', 'grp2/sg1', 'grp2/sg1/ssg1' ] for x in self.groups: self.f.create_group(x) def tearDown(self): self.f.close() def test_visit(self): """ All subgroups are visited """ l = [] self.f.visit(l.append) self.assertSameElements(l, self.groups) def test_visititems(self): """ All subgroups and contents are visited """ l = [] comp = [(x, self.f[x]) for x in self.groups] self.f.visititems(lambda x, y: l.append((x,y))) self.assertSameElements(comp, l) def test_bailout(self): """ Returning a non-None value immediately aborts iteration """ x = self.f.visit(lambda x: x) self.assertEqual(x, self.groups[0]) x = self.f.visititems(lambda x, y: (x,y)) self.assertEqual(x, (self.groups[0], self.f[self.groups[0]]))
def _save_dictionary(d: dict, dict_name: str, file: h5py.File) -> None: """ Saves a dictionary to hdf5 file. Note: Does not work for general dictionaries! :param d: The dictionary to save :param dict_name: The name of the dictionary :param file: The hdf5 file to which the dictionary will be added """ g = file.create_group(dict_name) for k in d: if "finish_time" in k or "start_time" in k: # need to encode datetime object as string date_time_string = d[k].strftime("%m/%d/%Y %I:%M:%S %p") g.create_dataset(k, data=date_time_string) else: g.create_dataset(k, data=d[k])
def _generate_hdf5_group(self, f: h5py.File = None): """Generate the group in the hdf5 file, if it does not exist yet.""" try: with h5py.File(self.file, 'a') as f: if f'history/{self.id}/trace/' not in f: grp = f.create_group(f'history/{self.id}/trace/') grp.attrs['n_iterations'] = 0 grp.attrs['n_fval'] = 0 grp.attrs['n_grad'] = 0 grp.attrs['n_hess'] = 0 grp.attrs['n_res'] = 0 grp.attrs['n_sres'] = 0 grp.attrs['trace_save_iter'] = self.options.trace_save_iter except OSError: pass
def init_file(self, sim: 'Simulation', h5file: h5py.File) -> None: h = h5file.create_group('history') n_particles = sum(s.initial_number_of_particles + s.particles_to_generate_each_step * sim.time_grid.total_nodes for s in sim.particle_sources) n_time = (sim.time_grid.total_nodes - 1) // sim.time_grid.node_to_save + 1 h['time'] = np.linspace(0, sim.time_grid.total_time, n_time) h['particles/ids'] = np.arange(n_particles) h['particles/coordinates'] = [np.string_('x'), np.string_('y'), np.string_('z')] h.create_dataset('particles/position', (n_particles, n_time, 3)) h['particles/position'].dims[0].label = 'id' h['particles/position'].dims.create_scale(h['particles/ids'], 'ids') h['particles/position'].dims[0].attach_scale(h['particles/ids']) h['particles/position'].dims[1].label = 'time' h['particles/position'].dims.create_scale(h['time'], 'time') h['particles/position'].dims[1].attach_scale(h['time']) h['particles/position'].dims[2].label = 'coordinates' h['particles/position'].dims.create_scale(h['particles/coordinates'], 'coordinates') h['particles/position'].dims[2].attach_scale(h['particles/coordinates']) h.create_dataset('particles/momentum', (n_particles, n_time, 3)) h['particles/momentum'].dims[0].label = 'id' h['particles/momentum'].dims.create_scale(h['particles/ids'], 'ids') h['particles/momentum'].dims[0].attach_scale(h['particles/ids']) h['particles/momentum'].dims[1].label = 'time' h['particles/momentum'].dims.create_scale(h['time'], 'time') h['particles/momentum'].dims[1].attach_scale(h['time']) h['particles/momentum'].dims[2].label = 'coordinate' h['particles/momentum'].dims.create_scale(h['particles/coordinates'], 'coordinates') h['particles/momentum'].dims[2].attach_scale(h['particles/coordinates']) h.create_dataset('particles/mass', (n_particles,)) h['particles/mass'].dims[0].label = 'id' h['particles/mass'].dims.create_scale(h['particles/ids'], 'ids') h['particles/mass'].dims[0].attach_scale(h['particles/ids']) h.create_dataset('particles/charge', (n_particles,)) h['particles/charge'].dims[0].label = 'id' h['particles/charge'].dims.create_scale(h['particles/ids'], 'ids') h['particles/charge'].dims[0].attach_scale(h['particles/ids']) if sim.particle_interaction_model == Model.PIC: h.create_dataset('field/potential', (n_time, *sim.potential.n_nodes)) h['field/potential'].dims[0].label = 'time' h['field/potential'].dims.create_scale(h['time'], 'time') h['field/potential'].dims[0].attach_scale(h['time']) for i, c in enumerate('xyz'): h[f'field/{c}'] = np.linspace(0, sim.electric_field.size[i], sim.electric_field.n_nodes[i]) h['field/potential'].dims[i + 1].label = c h['field/potential'].dims.create_scale(h[f'field/{c}'], c) h['field/potential'].dims[i + 1].attach_scale(h[f'field/{c}']) tree_to_hdf5(sim.tree, self.h5file.create_group('simulation'))
def export_configuration_to_hdf5(cfg: Configuration, f: h5py.File, path: str = '/'): grp = f.create_group(path) grp.attrs['type'] = 'Configuration' grp.attrs['name'] = cfg.name grp.attrs['location'] = [ cfg.location.latitude.value, cfg.location.longitude.value, cfg.location.height.value ] for col in cfg.data.columns: c = cfg.data[col] # Unicode wide strings are not supported, convert to ASCII if c.dtype.kind == 'U': c = c.astype("S") grp.create_dataset(col, data=c)
def _write_metadata(self, handle: h5py.File, names_to_write: List): """ Writes metadata to file based on the information passed to the object and the information in the particle groups. Parameters ---------- handle : h5py.File hdf5 file handle to write to names_to_write : list list of metadata fields to write """ part_types = max( metadata.particle.particle_name_underscores.keys()) + 1 number_of_particles = [0] * part_types mass_table = [0.0] * part_types for number, name in metadata.particle_types.particle_name_underscores.items( ): if name in names_to_write: number_of_particles[number] = getattr(self, name).n_part mass_table[number] = getattr(self, name).masses[0] attrs = { "BoxSize": self.box_size, "NumPart_Total": number_of_particles, "NumPart_Total_HighWord": [0] * 6, "Flag_Entropy_ICs": 0, "Dimension": np.array([self.dimension]), # LEGACY but required for Gadget readers "NumFilesPerSnapshot": 1, "NumPart_ThisFile": number_of_particles, "MassTable": mass_table, } if self.extra_header is not None: attrs = {**attrs, **self.extra_header} header = handle.create_group("Header") for name, value in attrs.items(): header.attrs.create(name, value) return
def save_metadata(self, hdf5: h5py.File, group_name: str) -> None: group = hdf5.create_group(group_name) num_scales, _, _ = self.get_data_sizes() num_vertices_input=len(self._get_base_cords()) group.create_dataset('cords', (num_vertices_input, 3), dtype=np.float, data=self.cords) dset_scale_names = group.create_dataset('scale names', shape=(num_scales,), dtype=h5py.string_dtype(encoding='ascii')) for idx, name in enumerate(self._get_scale_names()): dset_scale_names[idx] = name dset_displacements = group.create_dataset('displacements', (len(self), num_vertices_input, 3), dtype=np.float) for idx, files in enumerate(self._filename_iter()): point_file, _ = files dset_displacements[idx] = self._get_disps_from_csv(point_file, num_vertices_input) group.attrs['cameras'] = self.cameras group.attrs['mesh_wing_path'] = self.mesh_wing_path.name group.attrs['mesh_tip_path'] = self.mesh_tip_path.name group.attrs['resolution'] = self.resolution group.attrs['texture'] = self.texture_path.name
def save_model_state_dict(hf: h5py.File, state_dict: ModuleStateDict) -> None: g = hf.create_group(MODEL_STATE_DICT_GROUP, track_order=True) for private_key, tensor in state_dict.items(): if not isinstance(tensor, torch.Tensor): raise RuntimeError("Isn't the state dict supposed to be " "a shallow key-to-tensor mapping?!") for mapping in MODEL_STATE_DICT_MAPPINGS: try: public_key = mapping.private_to_public.map(private_key) except ValueError: continue else: break else: raise RuntimeError("Couldn't find a match for state dict key: %s" % private_key) dataset = g.create_dataset(public_key, data=tensor.numpy()) dataset.attrs[STATE_DICT_KEY_ATTR] = private_key
def save_to_hdf5(f: h5py.File, iteration, time, dt, r, p, m, q): # TODO use OpenPMD for saving instead of hdf5? N = r.shape[0] g = f.create_group(f.attrs["iterationFormat"].format(iteration)) g.attrs["time"] = time g.attrs["dt"] = dt g.attrs["timeUnitSI"] = time # TODO decide on something particles = g.create_group(f.attrs["particlesPath"] + b"particles") openPMD_positions = np.array([1] + [0] * 6, dtype=float) openPMD_momentum = np.array([1, 1, -1, 0, 0, 0, 0], dtype=float) openPMD_charge = np.array([0, 0, 1, 1, 0, 0, 0], dtype=float) openPMD_mass = np.array([0, 1, 0, 0, 0, 0, 0], dtype=float) for index, direction in enumerate("xyz"): position = particles.create_dataset(f"position/{direction}", data=r[:, index]) position.attrs["unitSI"] = 1.0 position.attrs["unitDimension"] = openPMD_positions position.attrs["timeOffset"] = 0.0 positionOffset = particles.create_dataset( f"positionOffset/{direction}", data=np.zeros(N) ) positionOffset.attrs["unitSI"] = 1.0 positionOffset.attrs["unitDimension"] = openPMD_positions positionOffset.attrs["timeOffset"] = 0.0 momentum = particles.create_dataset(f"momentum/{direction}", data=p[:, index]) momentum.attrs["unitSI"] = 1.0 momentum.attrs["unitDimension"] = openPMD_momentum momentum.attrs["timeOffset"] = 0.0 charge = particles.create_dataset("charge", data=q[:, 0]) charge.attrs["unitSI"] = 1.0 charge.attrs["unitDimension"] = openPMD_charge charge.attrs["timeOffset"] = 0.0 mass = particles.create_dataset("mass", data=m[:, 0]) mass.attrs["unitSI"] = 1.0 mass.attrs["unitDimension"] = openPMD_mass mass.attrs["timeOffset"] = 0.0
def write_particle_group(self, file_handle: h5py.File, compress: bool): """ Writes the particle group's required properties to file. """ particle_group = file_handle.create_group(self.particle_handle) if compress: compression = "gzip" else: compression = None for name, output_handle in getattr(metadata.required_fields, self.particle_name).items(): particle_group.create_dataset(output_handle, data=getattr(self, name), compression=compression) return
def save_to_hdf5(f: h5py.File, iteration, time, dt, r, p, m): # TODO use OpenPMD for saving instead of hdf5? N = r.shape[0] g = f.create_group(f.attrs["iterationFormat"].format(iteration)) g.attrs["time"] = time g.attrs["dt"] = dt g.attrs["timeUnitSI"] = time # TODO decide on something particles = g.create_group(f.attrs["particlesPath"] + b"particles") openPMD_positions = np.array([1] + [0] * 6, dtype=float) openPMD_momentum = np.array([1, 1, -1, 0, 0, 0, 0], dtype=float) openPMD_charge = np.array([0, 0, 1, 1, 0, 0, 0], dtype=float) openPMD_mass = np.array([0, 1, 0, 0, 0, 0, 0], dtype=float) for index, direction in enumerate("xyz"): position = particles.create_dataset(f"position/{direction}", data=to_numpy(r[:, index])) position.attrs["unitSI"] = 1.0 position.attrs["unitDimension"] = openPMD_positions position.attrs["timeOffset"] = 0.0 positionOffset = particles.create_dataset( f"positionOffset/{direction}", data=to_numpy(np.zeros(N))) positionOffset.attrs["unitSI"] = 1.0 positionOffset.attrs["unitDimension"] = openPMD_positions positionOffset.attrs["timeOffset"] = 0.0 momentum = particles.create_dataset(f"momentum/{direction}", data=to_numpy(p[:, index])) momentum.attrs["unitSI"] = 1.0 momentum.attrs["unitDimension"] = openPMD_momentum momentum.attrs["timeOffset"] = 0.0 particle_id = particles.create_dataset("id", data=to_numpy(np.arange(m.size))) mass = particles.create_dataset("mass", data=to_numpy(m)) mass.attrs["unitSI"] = 1.0 mass.attrs["unitDimension"] = openPMD_mass mass.attrs["timeOffset"] = 0.0
def save_cosmology(handle: h5py.File, cosmology: Cosmology): """ Save the (astropy) cosmology to a HDF5 dataset. Parameters ---------- handle: h5py.File h5py file handle to save the cosmology to. This is performed by creating a cosmology group and setting attributes. cosmology: astropy.cosmology.Cosmology The Astropy cosmology instance to save to the HDF5 file. This is performed by extracting all of the key variables and saving them as either floating point numbers or strings. Notes ----- This process can be reversed by using load_cosmology. """ group = handle.create_group("cosmology").attrs group.create("H0", cosmology.H0) group.create("Om0", cosmology.Om0) group.create("Ode0", cosmology.Ode0) group.create("Tcmb0", cosmology.Tcmb0) group.create("Neff", cosmology.Neff) group.create("m_nu", cosmology.m_nu if cosmology.m_nu is not None else 0.0) group.create( "m_nu_units", str(cosmology.m_nu.unit if cosmology.m_nu is not None else "") ) group.create("Ob0", cosmology.Ob0 if cosmology.Ob0 is not None else 0.0) group.create("name", cosmology.name if cosmology.name is not None else "") try: group.create("w0", cosmology.w0) except: # No EoS! pass return
def _save_hdf5(self, file: h5py.File): """ Actual implementation of HDF5 saving. Args: file: The open h5py.File to write the skeleton data to. Returns: None """ # All skeleton will be put as sub-groups in the skeleton group if "skeleton" not in file: all_sk_group = file.create_group("skeleton", track_order=True) else: all_sk_group = file.require_group("skeleton") # Write the dataset to JSON string, then store it in a string # attribute all_sk_group.attrs[self.name] = np.string_(self.to_json())
def _write_units(self, handle: h5py.File): """ Writes the unit information to file. Note that we do not have support for unit current yet. Parameters ---------- handle : h5py.File hdf5 file to write units to """ dim = unyt.dimensions cgs_base = unyt.unit_systems.cgs_unit_system.base_units base = self.unit_system.base_units def get_conversion(type): # We need to find the correct unit (which is now stored as a sympy value, # why?!) and convert it to an unyt unit. our_unit = unyt.unit_object.Unit(base[type]) cgs_unit = unyt.unit_object.Unit(cgs_base[type]) conversion_factor = our_unit.get_conversion_factor(cgs_unit)[0] # We use the array because this is how swift outputs it, as a length # 1 array (rather than as a single float). return np.array([conversion_factor]) attrs = { "Unit mass in cgs (U_M)": get_conversion(dim.mass), "Unit length in cgs (U_L)": get_conversion(dim.length), "Unit time in cgs (U_t)": get_conversion(dim.time), "Unit current in cgs (U_I)": np.array([1.0]), "Unit temperature in cgs (U_T)": get_conversion(dim.temperature), } units = handle.create_group("Units") for name, value in attrs.items(): units.attrs.create(name, value) return
class TestExternalLinks(TestCase): """ Feature: Create and manage external links """ def setUp(self): self.f = File(self.mktemp(), 'w') self.ename = self.mktemp() self.ef = File(self.ename, 'w') self.ef.create_group('external') self.ef.close() def tearDown(self): if self.f: self.f.close() if self.ef: self.ef.close() def test_epath(self): """ External link paths attributes """ el = ExternalLink('foo.hdf5', '/foo') self.assertEqual(el.filename, 'foo.hdf5') self.assertEqual(el.path, '/foo') def test_erepr(self): """ External link repr """ el = ExternalLink('foo.hdf5','/foo') self.assertIsInstance(repr(el), six.string_types) def test_create(self): """ Creating external links """ self.f['ext'] = ExternalLink(self.ename, '/external') grp = self.f['ext'] self.ef = grp.file self.assertNotEqual(self.ef, self.f) self.assertEqual(grp.name, '/external') def test_exc(self): """ KeyError raised when attempting to open broken link """ self.f['ext'] = ExternalLink(self.ename, '/missing') with self.assertRaises(KeyError): self.f['ext'] # I would prefer IOError but there's no way to fix this as the exception # class is determined by HDF5. def test_exc_missingfile(self): """ KeyError raised when attempting to open missing file """ self.f['ext'] = ExternalLink('mongoose.hdf5','/foo') with self.assertRaises(KeyError): self.f['ext'] def test_close_file(self): """ Files opened by accessing external links can be closed Issue 189. """ self.f['ext'] = ExternalLink(self.ename, '/') grp = self.f['ext'] f2 = grp.file f2.close() self.assertFalse(f2) @ut.skipIf(NO_FS_UNICODE, "No unicode filename support") def test_unicode_encode(self): """ Check that external links encode unicode filenames properly Testing issue #732 """ ext_filename = os.path.join(mkdtemp(), u"α.hdf5") with File(ext_filename, "w") as ext_file: ext_file.create_group('external') self.f['ext'] = ExternalLink(ext_filename, '/external') @ut.skipIf(NO_FS_UNICODE, "No unicode filename support") def test_unicode_decode(self): """ Check that external links decode unicode filenames properly Testing issue #732 """ ext_filename = os.path.join(mkdtemp(), u"α.hdf5") with File(ext_filename, "w") as ext_file: ext_file.create_group('external') ext_file["external"].attrs["ext_attr"] = "test" self.f['ext'] = ExternalLink(ext_filename, '/external') self.assertEqual(self.f["ext"].attrs["ext_attr"], "test") def test_unicode_hdf5_path(self): """ Check that external links handle unicode hdf5 paths properly Testing issue #333 """ ext_filename = os.path.join(mkdtemp(), "external.hdf5") with File(ext_filename, "w") as ext_file: ext_file.create_group(u'α') ext_file[u"α"].attrs["ext_attr"] = "test" self.f['ext'] = ExternalLink(ext_filename, u'/α') self.assertEqual(self.f["ext"].attrs["ext_attr"], "test")
def make_nuc(ncc_file_path, n3d_file_path, out_file_name): if not out_file_name.lower().endswith('.nuc'): out_file_name = out_file_name + '.nuc' contact_dict = import_contacts(ncc_file_path) contact_name = os.path.splitext(os.path.basename(ncc_file_path))[0] pos_dict, coords_dict = import_coords(n3d_file_path) root = File(out_file_name, mode='w') hierarchy = (('contacts', ('original', 'working')), ('display', ()), ('chromosomes',()), ('dataTracks', ('derived', 'external', 'innate')), ('sample', ('protocol', 'organism', 'tissue')), ('structures', ('0')), ('images', ()) ) for parent, children in hierarchy: group = root.create_group(parent) for child in children: group.create_group(child) for child in ('particles', 'restraints', 'transforms', 'coords'): root['structures']['0'].create_group(child) now = int(time.time()) random.seed(now) root.attrs['id'] = np.array([random.random(), now, now], np.float32) root['sample'].attrs['name'] = np.string_('Unknown') contact_group = root['contacts']['working'].create_group(contact_name) for chromoPair in contact_dict: chrA, chrB = chromoPair if chrA not in contact_group: contact_group.create_group(chrA) contact_group[chrA].create_dataset(chrB, dtype=np.uint32, data=contact_dict[chromoPair].T) coords_group = root['structures']['0']['coords'] particle_group = root['structures']['0']['particles'] for chromo in coords_dict: coords_group.create_dataset(chromo, dtype=np.float64, data=coords_dict[chromo]) pos = np.array(pos_dict[chromo], np.uint32) group = particle_group.create_group(chromo) group.create_dataset('positions', dtype=np.uint32, data=pos) chromo_group = root['chromosomes'].create_group(chromo) chromo_group.attrs['limits'] = np.array([pos.min(), pos.max()]) root.flush()
from itertools import product parser = OptionParser() opts, args = parser.parse_args() CopyH5File(src=args[0], dst=args[1], skip='prim') h5filei = File(args[0], 'r') h5fileo = File(args[1], 'a') new_dt = h5fileo['status/Timestep'].value / 2.0 del h5fileo['status/Timestep'] h5fileo['status/Timestep'] = new_dt primi = h5filei["prim"] primo = h5fileo.create_group("prim") for dset in primi: print "Working on data set", dset datai = primi[dset] if not datai.chunks: chnksize = array([16,16,16]) else: chnksize = array(datai.chunks) globsize = array(datai.shape) numchunk = globsize/chnksize datao = primo.create_dataset(dset, globsize*2, chunks=tuple(chnksize*2), dtype='f8') print "Using chunk size", datao.chunks
class TestCopy(TestCase): def setUp(self): self.f1 = File(self.mktemp(), 'w') self.f2 = File(self.mktemp(), 'w') def tearDown(self): if self.f1: self.f1.close() if self.f2: self.f2.close() @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_path_to_path(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] self.f1.copy('foo', 'baz') baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_path_to_group(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] baz = self.f1.create_group('baz') self.f1.copy('foo', baz) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['foo/bar'], np.array([1,2,3])) self.f1.copy('foo', self.f2['/']) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_group_to_path(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] self.f1.copy(foo, 'baz') baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['bar'], np.array([1,2,3])) self.f2.copy(foo, 'foo') self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_group_to_group(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] baz = self.f1.create_group('baz') self.f1.copy(foo, baz) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['foo/bar'], np.array([1,2,3])) self.f1.copy(foo, self.f2['/']) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_dataset(self): self.f1['foo'] = [1,2,3] foo = self.f1['foo'] self.f1.copy(foo, 'bar') self.assertArrayEqual(self.f1['bar'], np.array([1,2,3])) self.f1.copy('foo', 'baz') self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) self.f1.copy('foo', self.f2) self.assertArrayEqual(self.f2['foo'], np.array([1,2,3])) self.f2.copy(self.f1['foo'], self.f2, 'bar') self.assertArrayEqual(self.f2['bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_shallow(self): foo = self.f1.create_group('foo') bar = foo.create_group('bar') foo['qux'] = [1,2,3] bar['quux'] = [4,5,6] self.f1.copy(foo, 'baz', shallow=True) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertIsInstance(baz['bar'], Group) self.assertEqual(len(baz['bar']), 0) self.assertArrayEqual(baz['qux'], np.array([1,2,3])) self.f2.copy(foo, 'foo', shallow=True) self.assertIsInstance(self.f2['/foo'], Group) self.assertIsInstance(self.f2['foo/bar'], Group) self.assertEqual(len(self.f2['foo/bar']), 0) self.assertArrayEqual(self.f2['foo/qux'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_without_attributes(self): self.f1['foo'] = [1,2,3] foo = self.f1['foo'] foo.attrs['bar'] = [4,5,6] self.f1.copy(foo, 'baz', without_attrs=True) self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) self.assert_('bar' not in self.f1['baz'].attrs) self.f2.copy(foo, 'baz', without_attrs=True) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) self.assert_('bar' not in self.f2['baz'].attrs) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_soft_links(self): self.f1['bar'] = [1,2,3] foo = self.f1.create_group('foo') foo['baz'] = SoftLink('/bar') self.f1.copy(foo, 'qux', expand_soft=True) self.f2.copy(foo, 'foo', expand_soft=True) del self.f1['bar'] self.assertIsInstance(self.f1['qux'], Group) self.assertArrayEqual(self.f1['qux/baz'], np.array([1,2,3])) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/baz'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_external_links(self): filename = self.f1.filename self.f1['foo'] = [1,2,3] self.f2['bar'] = ExternalLink(filename, 'foo') self.f1.close() self.f1 = None self.assertArrayEqual(self.f2['bar'], np.array([1,2,3])) self.f2.copy('bar', 'baz', expand_external=True) os.unlink(filename) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_refs(self): self.f1['foo'] = [1,2,3] self.f1['bar'] = [4,5,6] foo = self.f1['foo'] bar = self.f1['bar'] foo.attrs['bar'] = bar.ref self.f1.copy(foo, 'baz', expand_refs=True) self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) baz_bar = self.f1['baz'].attrs['bar'] self.assertArrayEqual(self.f1[baz_bar], np.array([4,5,6])) # The reference points to a copy of bar, not to bar itself. self.assertNotEqual(self.f1[baz_bar].name, bar.name) self.f1.copy('foo', self.f2, 'baz', expand_refs=True) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) baz_bar = self.f2['baz'].attrs['bar'] self.assertArrayEqual(self.f2[baz_bar], np.array([4,5,6])) self.f1.copy('/', self.f2, 'root', expand_refs=True) self.assertArrayEqual(self.f2['root/foo'], np.array([1,2,3])) self.assertArrayEqual(self.f2['root/bar'], np.array([4,5,6])) foo_bar = self.f2['root/foo'].attrs['bar'] self.assertArrayEqual(self.f2[foo_bar], np.array([4,5,6])) # There's only one copy of bar, which the reference points to. self.assertEqual(self.f2[foo_bar], self.f2['root/bar'])
The input directory must contain the MCSModel*.txt files. ./tlm2h5.py <inputdir> <outfile.h5> [grpname] """ import sys from os.path import join import numpy as np from scipy import loadtxt from h5py import File basedir=sys.argv[1] H5=File(sys.argv[2]) HG=H5.create_group(sys.argv[3]) if len(sys.argv)>3 else H5 H5.attrs['sim_type']='MomentMatrix' V=loadtxt(join(basedir, 'MCSModelCenVec.txt')) S=V[:,0] HG.create_dataset('pos', data=S) D=HG.create_dataset('moment0_env', dtype=V.dtype, shape=(V.shape[0], 7)) D[:,:-1] = V[:,1:] V=loadtxt(join(basedir, 'MCSModelRmsVec.txt')) if not np.all(V[:,0]==S): print 'S positions inconsistent'
class TestAdditionalMappingFuncs(BaseMapping): """ Feature: Other dict methods (pop, pop_item, clear, update, setdefault) are available. """ def setUp(self): self.f = File(self.mktemp(), 'w') for x in ('/test/a','/test/b','/test/c','/test/d'): self.f.create_group(x) self.group = self.f['test'] def tearDown(self): if self.f: self.f.close() def test_pop_item(self): """.pop_item exists and removes item""" key, val = self.group.popitem() self.assertNotIn(key, self.group) def test_pop(self): """.pop exists and removes specified item""" self.group.pop('a') self.assertNotIn('a', self.group) def test_pop_default(self): """.pop falls back to default""" # e shouldn't exist as a group value = self.group.pop('e', None) self.assertEqual(value, None) def test_pop_raises(self): """.pop raises KeyError for non-existence""" # e shouldn't exist as a group with self.assertRaises(KeyError): key = self.group.pop('e') def test_clear(self): """.clear removes groups""" self.group.clear() self.assertEqual(len(self.group), 0) def test_update_dict(self): """.update works with dict""" new_items = {'e': np.array([42])} self.group.update(new_items) self.assertIn('e', self.group) def test_update_iter(self): """.update works with list""" new_items = [ ('e', np.array([42])), ('f', np.array([42])) ] self.group.update(new_items) self.assertIn('e', self.group) def test_update_kwargs(self): """.update works with kwargs""" new_items = {'e': np.array([42])} self.group.update(**new_items) self.assertIn('e', self.group) def test_setdefault(self): """.setdefault gets group if it exists""" value = self.group.setdefault('a') self.assertEqual(value, self.group.get('a')) def test_setdefault_with_default(self): """.setdefault gets default if group doesn't exist""" # e shouldn't exist as a group # 42 used as groups should be strings value = self.group.setdefault('e', np.array([42])) self.assertEqual(value, 42) def test_setdefault_no_default(self): """ .setdefault gets None if group doesn't exist, but as None isn't defined as data for a dataset, this should raise a TypeError. """ # e shouldn't exist as a group with self.assertRaises(TypeError): self.group.setdefault('e')
def test_closed_file(self): """ Trying to modify closed file raises ValueError """ fid = File(self.mktemp(), 'w') fid.close() with self.assertRaises(ValueError): fid.create_group('foo')