class TestSimpleSlicing(TestCase): """ Feature: Simple NumPy-style slices (start:stop:step) are supported. """ def setUp(self): self.f = File(self.mktemp(), 'w') self.arr = np.arange(10) self.dset = self.f.create_dataset('x', data=self.arr) def tearDown(self): if self.f: self.f.close() def test_negative_stop(self): """ Negative stop indexes work as they do in NumPy """ self.assertArrayEqual(self.dset[2:-2], self.arr[2:-2]) def test_write(self): """Assigning to a 1D slice of a 2D dataset """ dset = self.f.create_dataset('x2', (10, 2)) x = np.zeros((10, 1)) dset[:, 0] = x[:, 0] with self.assertRaises(TypeError): dset[:, 1] = x
def dump( histogram, filename = None, pathinfile = '/', mode = 'c', fs = None, compression = 'lzf'): '''dump( histogram, hdf_filename, path_in_hdf_file, mode ) -> save histogram into a hdf file. histogram: The histogram to be written hdf_filename: The hdf filename in which the histogram will be saved path_in_hdf_file: The path inside the hdf file where the histogram is located. mode: The mode to be used to write to the hdf file. 'c': create new hdf file. If hdf file of the same name exists, this command will fail. 'w': write to existing hdf file. If the path_in_hdf_file already exists in the hdf file, this command will fail. compression: The compression ratio. If it is 0, no compression will be done. The valid values are integers from 0 to 9 (inclusive). ''' from Renderer import Renderer #g = graphFromHDF5File( filename, pathinfile, fs = fs ) pathinfile = pathinfile.split( '/' ) p = pathinfile + [histogram.name()] p = '/'.join( p ) if not p.startswith('/'): p = '/' + p writeCodes = {'c':'w','w':'a'} if fs is None: from h5py import File fs = File(filename, writeCodes[mode]) Renderer(fs, compression).render(histogram) fs.close() else: Renderer(fs, compression).render(histogram)
class BaseDataset(TestCase): def setUp(self): self.f = File(self.mktemp(), 'w') def tearDown(self): if self.f: self.f.close()
def test_blocksize(self): """ Core driver supports variable block size """ fname = self.mktemp() fid = File(fname, 'w', driver='core', block_size=1024, backing_store=False) self.assertTrue(fid) fid.close()
def __init__(self, fname): from h5py import File from numpy import array, log10 import json h5f = File(fname, "r") log = sorted(json.loads(h5f["measure"].value).values(), key=lambda e: e["Status"]["Iteration"]) runargs = json.loads(h5f["runargs"].value) self.N = runargs["N"] self.zeta = runargs.get("zeta", 1.0) self.time = array([entry["Status" ]["CurrentTime"] for entry in log]) self.mean_T = array([entry["mean_T" ] for entry in log]) self.max_T = array([entry["max_T" ] for entry in log]) self.mean_Ms = array([entry["mean_Ms"] for entry in log]) self.max_Ms = array([entry["max_Ms" ] for entry in log]) self.mean_Ma = array([entry["mean_Ma"] for entry in log]) self.min_Ma = array([entry["min_Ma" ] for entry in log]) self.kin = array([entry["energies"]["kinetic" ] for entry in log]) self.tie = array([entry["energies"]["internal"] for entry in log]) self.mag = array([entry["energies"]["magnetic"] for entry in log]) self.tot = array([entry["energies"]["total" ] for entry in log]) self.mean_gamma = array([entry["mean_velocity"][0] for entry in log]) self.max_gamma = array([entry["max_lorentz_factor"] for entry in log]) self.runargs = runargs h5f.close()
class BasH5(object): """ This class is deprecated. .. deprecated:: 0.3.0 Use `BasH5Reader` instead. """ def __init__(self, filename, readType='Raw'): self._h5f = File(filename, 'r') self.rgnTable = RegionTable(self._h5f) self.baseCallsDG = None if readType == 'Raw': self.baseCallsDG = BaseCallsDataGroup(self._h5f, '/PulseData/BaseCalls') elif readType == 'CCS': self.baseCallsDG = CCSBaseCallsDataGroup(self._h5f, '/PulseData/ConsensusBaseCalls') self.rbaseCallsDG = BaseCallsDataGroup(self._h5f, '/PulseData/BaseCalls') def __del__(self): self._h5f.close() def getZMWs(self): for hn in self.baseCallsDG.holeNumber: yield hn def getSequencingZMWs(self): for hn in self.getZMWs(): if self.baseCallsDG.getStatusStringForZMW(hn) == 'SEQUENCING' and self.baseCallsDG.getBaseCallLenForZMW(hn): yield hn
class BaseDataset(TestCase): """ data is a 3-dimensional dataset with dimensions [z, y, x] The z dimension is labeled. It does not have any attached scales. The y dimension is not labeled. It has one attached scale. The x dimension is labeled. It has two attached scales. data2 is a 3-dimensional dataset with no associated dimension scales. """ def setUp(self): self.f = File(self.mktemp(), 'w') self.f['data'] = np.ones((4, 3, 2), 'f') self.f['data2'] = np.ones((4, 3, 2), 'f') self.f['x1'] = np.ones((2), 'f') h5py.h5ds.set_scale(self.f['x1'].id) h5py.h5ds.attach_scale(self.f['data'].id, self.f['x1'].id, 2) self.f['x2'] = np.ones((2), 'f') h5py.h5ds.set_scale(self.f['x2'].id, b'x2 name') h5py.h5ds.attach_scale(self.f['data'].id, self.f['x2'].id, 2) self.f['y1'] = np.ones((3), 'f') h5py.h5ds.set_scale(self.f['y1'].id, b'y1 name') h5py.h5ds.attach_scale(self.f['data'].id, self.f['y1'].id, 1) self.f['z1'] = np.ones((4), 'f') h5py.h5ds.set_label(self.f['data'].id, 0, b'z') h5py.h5ds.set_label(self.f['data'].id, 2, b'x') def tearDown(self): if self.f: self.f.close()
def test_mode(self): """ Retrieved File objects have a meaningful mode attribute """ hfile = File(self.mktemp(),'w') try: grp = hfile.create_group('foo') self.assertEqual(grp.file.mode, hfile.mode) finally: hfile.close()
def test_create_exclusive(self): """ Mode 'w-' opens file in exclusive mode """ fname = self.mktemp() fid = File(fname, 'w-') self.assertTrue(fid) fid.close() with self.assertRaises(IOError): File(fname, 'w-')
def hasVersion(filename): """Check filename as sassena version""" from h5py import File f = File(filename,'r') value=False if 'sassena_version' in f.attrs.keys(): value=True f.close() return value
def test_iter_zero(self): """ Iteration works properly for the case with no group members """ hfile = File(self.mktemp(), 'w') try: lst = [x for x in hfile] self.assertEqual(lst, []) finally: hfile.close()
def get_dataset(h5file: File, path: DatasetPath) -> Dataset: res = None if isinstance(path, DatasetPathContains): res = h5file.visititems(partial(_v_item, path.path)) elif isinstance(path, DatasetPathWithAttribute): res = h5file.visititems(partial(_v_attrs, path.attribute, path.value)) return res
def test_core(self): """ Core driver is supported (no backing store) """ fname = self.mktemp() fid = File(fname, 'w', driver='core', backing_store=False) self.assertTrue(fid) self.assertEqual(fid.driver, 'core') fid.close() self.assertFalse(os.path.exists(fname))
def ReadFiniteRadiusWaveform(n, filename, WaveformName, ChMass, InitialAdmEnergy, YLMRegex, LModes, DataType, Ws) : """ This is just a worker function defined for ReadFiniteRadiusData, below, reading a single waveform from an h5 file of many waveforms. You probably don't need to call this directly. """ from scipy.integrate import cumtrapz as integrate from numpy import setdiff1d, empty, delete, sqrt, log, array from h5py import File import GWFrames try : f = File(filename, 'r') except IOError : print("ReadFiniteRadiusWaveform could not open the file '{0}'".format(filename)) raise try : W = f[WaveformName] NTimes_Input = W['AverageLapse.dat'].shape[0] T = W['AverageLapse.dat'][:,0] Indices = MonotonicIndices(T) T = T[Indices] Radii = array(W['ArealRadius.dat'])[Indices,1] AverageLapse = array(W['AverageLapse.dat'])[Indices,1] CoordRadius = W['CoordRadius.dat'][0,1] YLMdata = [DataSet for DataSet in list(W) for m in [YLMRegex.search(DataSet)] if (m and int(m.group('L')) in LModes)] YLMdata = sorted(YLMdata, key=lambda DataSet : [int(YLMRegex.search(DataSet).group('L')), int(YLMRegex.search(DataSet).group('M'))]) LM = sorted([[int(m.group('L')), int(m.group('M'))] for DataSet in YLMdata for m in [YLMRegex.search(DataSet)] if m]) NModes = len(LM) # Lapse is given by 1/sqrt(-g^{00}), where g is the full 4-metric T[1:] = integrate(AverageLapse/sqrt(((-2.0*InitialAdmEnergy)/Radii) + 1.0), T) + T[0] T -= (Radii + (2.0*InitialAdmEnergy)*log((Radii/(2.0*InitialAdmEnergy))-1.0)) Ws[n].SetTime(T/ChMass) # WRONG!!!: # Radii /= ChMass NTimes = Ws[n].NTimes() # Ws[n].SetFrame is not done, because we assume the inertial frame Ws[n].SetFrameType(GWFrames.Inertial) # Assumption! (but this should be safe) Ws[n].SetDataType(DataType) Ws[n].SetRIsScaledOut(True) # Assumption! (but it should be safe) Ws[n].SetMIsScaledOut(True) # We have made this true Ws[n].SetLM(LM) Data = empty((NModes, NTimes), dtype='complex') if(DataType == GWFrames.h) : UnitScaleFactor = 1.0 / ChMass elif(DataType == GWFrames.hdot) : UnitScaleFactor = 1.0 elif(DataType == GWFrames.Psi4) : UnitScaleFactor = ChMass else : raise ValueError('DataType "{0}" is unknown.'.format(DataType)) RadiusRatio = Radii / CoordRadius for m,DataSet in enumerate(YLMdata) : modedata = array(W[DataSet]) Data[m,:] = (modedata[Indices,1] + 1j*modedata[Indices,2]) * RadiusRatio * UnitScaleFactor Ws[n].SetData(Data) finally : f.close() return Radii/ChMass
def test_filename(self): """ .filename behaves properly for string data """ fname = self.mktemp() fid = File(fname, 'w') try: self.assertEqual(fid.filename, fname) self.assertIsInstance(fid.filename, six.text_type) finally: fid.close()
def test_close_multiple_mpio_driver(self): """ MPIO driver and options """ from mpi4py import MPI fname = self.mktemp() f = File(fname, 'w', driver='mpio', comm=MPI.COMM_WORLD) f.create_group("test") f.close() f.close()
def test_property(self): """ File object can be retrieved from subgroup """ fname = self.mktemp() hfile = File(fname, 'w') try: hfile2 = hfile['/'].file self.assertEqual(hfile, hfile2) finally: hfile.close()
def h5_writer(data, h5_path): from h5py import File from os.path import exists if exists(h5_path): remove(h5_path) f = File(h5_path, 'w') f.create_dataset('default', data=data, compression='gzip', chunks=True, shuffle=True) f.close()
def weightsToHDF(w, name): f=File(name+".h5","w") weights=f.create_group("Weights") for i in range(len(w[:-1])): weights.create_dataset("Hidden "+str(i+1),data=w[i]) weights.create_dataset("Output",data=w[-1]) f.close()
def test_unicode(self): """ Unicode filenames can be used, and retrieved properly via .filename """ fname = self.mktemp(prefix = six.unichr(0x201a)) fid = File(fname, 'w') try: self.assertEqual(fid.filename, fname) self.assertIsInstance(fid.filename, six.text_type) finally: fid.close()
def writeData(data, outputFilename): """ Writes data to a tiff, hdf5, or npy file. Parameters ---------- data : 3D numpy array The data to be written. Must have 3 dimensions, i.e. data.ndim == 3 outputFilename : string The absolute or relative location of the particular file to be read in. outputFilename must end in one of the following extensions ['.tif', '.tiff', '.hdf5', '.h5', '.npy']. Notes ----- - Data to be saved must be a 3D array. """ assert data.ndim==3, "Can only write out 3D hdf5, tiff, and numpy files" filename = outputFilename.rstrip('/') basePath, fName = os.path.split(filename) name, ext = os.path.splitext(fName) if basePath and not os.path.exists(basePath): raise IOError, "Directory does not exist: %s" % (basePath) if ext.lower() in ['.npy']: try: np.save(filename, np.array(data,dtype=np.float32)) except IOError: raise IOError, "Error writing npy data to: \"%s\"" % filename elif ext.lower() in ['.h5', '.hdf5']: from h5py import File try: h5File = File(filename, "w") except IOError: raise IOError, "Error creating writable hdf5 file at: \"%s\"" % filename shp = data.shape comp="gzip" compOpts=1 dset = h5File.create_dataset("/raw", shp, np.float32, data, chunks=shp, compression=comp, compression_opts=compOpts) elif ext.lower() in ['.tif', '.tiff']: from libtiff import TIFF try: tiff = TIFF.open(filename, 'w') tiff.write_image(np.array(data,dtype=np.float32)) except IOError: raise IOError, "Error writing tif file at: \"%s\"" % filename tiff.close() else: assert False, "Can only write out 3D hdf5, tiff, and numpy files"
def hdf5_writer(filename, data, components=None): """ Write a dataset or a subset to a FITS file. Parameters ---------- data : `~glue.core.data.Data` or `~glue.core.subset.Subset` The data or subset to export components : `list` or `None` The components to export. Set this to `None` to export all components. """ if isinstance(data, Subset): mask = data.to_mask() data = data.data else: mask = None from h5py import File f = File(filename, 'w') for cid in data.visible_components: if components is not None and cid not in components: continue comp = data.get_component(cid) if comp.categorical: if comp.labels.dtype.kind == 'U': values = np.char.encode(comp.labels, encoding='ascii', errors='replace') else: values = comp.labels.copy() else: values = comp.data.copy() if mask is not None: if values.ndim == 1: values = values[mask] else: if values.dtype.kind == 'f': values[~mask] = np.nan elif values.dtype.kind == 'i': values[~mask] = 0 elif values.dtype.kind == 'S': values[~mask] = '' else: warnings.warn("Unknown data type in HDF5 export: {0}".format(values.dtype)) continue print(values) f.create_dataset(cid.label, data=values) f.close()
def test_mode_external(self): """ Mode property works for files opened via external links Issue 190. """ fname1 = self.mktemp() fname2 = self.mktemp() f1 = File(fname1,'w') f1.close() f2 = File(fname2,'w') try: f2['External'] = h5py.ExternalLink(fname1, '/') f3 = f2['External'].file self.assertEqual(f3.mode, 'r+') finally: f2.close() f3.close() f2 = File(fname2,'r') try: f3 = f2['External'].file self.assertEqual(f3.mode, 'r') finally: f2.close() f3.close()
def test_readwrite(self): """ Mode 'r+' opens existing file in readwrite mode """ fname = self.mktemp() fid = File(fname, 'w') fid.create_group('foo') fid.close() fid = File(fname, 'r+') assert 'foo' in fid fid.create_group('bar') assert 'bar' in fid fid.close()
def test_write_only(self): """ User block only allowed for write """ name = self.mktemp() f = File(name, 'w') f.close() with self.assertRaises(ValueError): f = h5py.File(name, 'r', userblock_size=512) with self.assertRaises(ValueError): f = h5py.File(name, 'r+', userblock_size=512)
def test_close(self): """ All retrieved File objects are closed at the same time """ fname = self.mktemp() hfile = File(fname, 'w') grp = hfile.create_group('foo') hfile2 = grp.file hfile3 = hfile['/'].file hfile2.close() self.assertFalse(hfile) self.assertFalse(hfile2) self.assertFalse(hfile3)
def test_readonly(self): """ Core driver can be used to open existing files """ fname = self.mktemp() fid = File(fname, 'w') fid.create_group('foo') fid.close() fid = File(fname, 'r', driver='core') self.assertTrue(fid) assert 'foo' in fid with self.assertRaises(ValueError): fid.create_group('bar') fid.close()
def test_issue_212(self): """ Issue 212 Fails with: AttributeError: 'SharedConfig' object has no attribute 'lapl' """ def closer(x): def w(): try: if x: x.close() except IOError: pass return w orig_name = self.mktemp() new_name = self.mktemp() f = File(orig_name, 'w') self.addCleanup(closer(f)) f.create_group('a') f.close() g = File(new_name, 'w') self.addCleanup(closer(g)) g['link'] = ExternalLink(orig_name, '/') # note root group g.close() h = File(new_name, 'r') self.addCleanup(closer(h)) self.assertIsInstance(h['link']['a'], Group)
def orderByQmodulus(filename,outfile=None): """ Sassena does not enforce any ordering of the structure factors. Here we order by increasing value of modulus of Q-vectors. """ from h5py import File import numpy f=File(filename,'r') overwrite=False if not outfile: outfile=tempfile() # temporaty output file overwrite=True g=File(outfile,'w') ds_q = numpy.array(f["qvectors"]) # shape==(nvectors,3) moduli=numpy.square(ds_q).sum(axis=1) # moduli-squared of the Q-vectors rank=numpy.argsort(moduli) # rank from smallest to greatest for dset in ('qvectors', 'fqt', 'fq', 'fq0', 'fq2'): if dset in f.keys(): x=numpy.array(f[dset]) if not outfile: del f[dset] f[dset]=x[rank] else: g[dset]=x[rank] for key,val in f.attrs.items(): g.attrs[key]=val g.close() f.close() if overwrite: os.system('/bin/mv %s %s'%(outfile,filename)) return None
def get_phase(args): filename = args[0] path = args[1] path_raw = args[2] path_images = args[3] mask = args[4] coord = args[5] file_in = os.path.join(path,filename) file_raw = os.path.join(path_raw,'raw_'+filename) image_phase = os.path.join(path_images,'wrapped'+filename[4:11]+'bmp') binary_phase = os.path.join(path_raw,'wrapped'+filename[4:11]+'dat') mod_arr = os.path.join(path_raw,'mod'+filename[4:11]+'dat') mod_image = os.path.join(path_images,'mod'+filename[4:11]+'bmp') qual_arr = os.path.join(path_raw,'qual'+filename[4:11]+'dat') qual_image = os.path.join(path_images,'qual'+filename[4:11]+'bmp') # Open meas file and grab dataset try: f = File(file_in, 'r') except: print 'Corrupt h5 file: '+filename+' ignoring' return sub = f.get(r'measurement0/frames/frame_full/data') data = np.array(sub[coord[0]-1:coord[1]+1,coord[2]-1:coord[3]+1],'f') f.close() # Get phase phase, modulation, intensity = calc_phase(data) # Apply mask phase[~mask] = 0 intensity[~mask] = 0 modulation[~mask] = 0 #phase = phase[coord[0]:coord[1],coord[2]:coord[3]] # Save phase toimage(phase).save(image_phase) phase.tofile(binary_phase) ave_mod = np.average(modulation[mask]) ave_int = np.average(intensity[mask]) ''' if ave_mod < 0.6: print filename+' low mod:', ave_mod else: sys.stdout.write('.') ''' return "%s,%f,%f\n" % (filename, ave_int, ave_mod)
def setUp(self): self.f1 = File(self.mktemp(), 'w') self.f2 = File(self.mktemp(), 'w')
def _get_random_args(aug_dict, shape, noise_load_dict=None, noise_on_channel=None): """ :param aug_dict: :param shape: :param noise_load_dict: dict( filepath='/path/to/noise_file', size=number_of_elements ) :return: """ def _load_noise_from_data(): # Randomly select chunk position pos = int( np.random.uniform(0, noise_load_dict['size'] - np.prod(shape))) # Load the data noise = noise_load_dict['data'][pos:pos + np.prod(shape)] # Reshape to match the images noise = np.reshape(noise, shape) # Get the proper standard variation var = np.random.uniform(0, aug_dict['noise_var_range']) noise *= (var**0.5) return noise # FIXME this is still the major bottleneck if aug_dict['noise_var_range'] > 0: if noise_load_dict is not None: if 'data' not in noise_load_dict or noise_load_dict['data'] is None: print('Trying to load some noise ...') if os.path.exists(noise_load_dict['filepath']): with File(noise_load_dict['filepath'], mode='r') as f: noise_load_dict['data'] = f['data'][:] else: print( 'Noise file does not exist, creating it now ... This may take a while ...' ) print('Generating a lot of noise ...') noise_load_dict['data'] = np.random.normal( 0, 1, (noise_load_dict['size'], )) print('Make some noise!!!') with File(noise_load_dict['filepath'], mode='w') as f: f.create_dataset('data', data=noise_load_dict['data']) noise = _load_noise_from_data() else: var = np.random.uniform(0, aug_dict['noise_var_range']) # noise = np.random.normal(0, var ** 0.5, shape) if noise_on_channel is None: im = np.zeros((np.prod(shape), )) noise = cv2.randn(im, 0, var**0.5) noise = (np.reshape(noise, shape) * 127 + 128).astype('uint8') else: noise = np.ones(shape, dtype='uint8') * 128 for ch in noise_on_channel: n_im = np.zeros((int(np.prod(shape) / shape[3]), )) n_im = cv2.randn(n_im, 0, var**0.5) n_im = np.reshape(n_im, shape[:3]) noise[..., ch] = (n_im * 127 + 128).astype('uint8') else: noise = None # print('Noise.shape = {}'.format(noise.shape)) random_smoothing = [0, 0, 0] random_smoothing[0] = np.random.uniform(0, 1) * np.pi if aug_dict['random_smooth_range'][0] > 0: random_smoothing[1] = np.random.uniform( 0, aug_dict['random_smooth_range'][0]) if aug_dict['random_smooth_range'][1] > 0: random_smoothing[2] = np.random.uniform( 0, aug_dict['random_smooth_range'][1]) displace_slices = [[], []] if aug_dict['displace_slices_range'] > 0: displace_slices[0] = [ np.random.uniform(-aug_dict['displace_slices_range'], aug_dict['displace_slices_range']) for idx in range(shape[2]) ] displace_slices[1] = [ np.random.uniform(-aug_dict['displace_slices_range'], aug_dict['displace_slices_range']) for idx in range(shape[2]) ] brightness = 0 if aug_dict['brightness_range'] > 0: brightness = np.random.uniform(-aug_dict['brightness_range'], aug_dict['brightness_range']) contrast = 0 if aug_dict['contrast_range']: if type(aug_dict['contrast_range']) == tuple: contrast = np.random.uniform(aug_dict['contrast_range'][0], aug_dict['contrast_range'][1]) elif type(aug_dict['contrast_range']) == float: divide = np.random.random() < 0.5 contrast = np.random.uniform(1, aug_dict['contrast_range']) if divide: contrast = 1 / contrast elif type(aug_dict['contrast_range']) == dict: ctr_settings = aug_dict['contrast_range'] divide = np.random.random() < ctr_settings['increase_ratio'] if divide: contrast = np.random.uniform(1, ctr_settings['increase']) else: contrast = 1 / np.random.uniform(1, ctr_settings['decrease']) else: raise NotImplementedError rotation = 0 if aug_dict['rotation_range']: rotation = np.deg2rad( np.random.uniform(-aug_dict['rotation_range'], aug_dict['rotation_range'])) shear = 0 if aug_dict['shear_range'] > 0: shear = np.deg2rad( np.random.uniform(-aug_dict['shear_range'], aug_dict['shear_range'])) zoom = [1, 1] if aug_dict['zoom_range'][0] != 1 and aug_dict['zoom_range'][1] != 1: zoom = list( np.random.uniform(aug_dict['zoom_range'][0], aug_dict['zoom_range'][1], 2)) horizontal_flip = False if aug_dict['horizontal_flip']: horizontal_flip = np.random.random() < 0.5 vertical_flip = False if aug_dict['vertical_flip']: vertical_flip = np.random.random() < 0.5 depth_flip = False if aug_dict['depth_flip']: depth_flip = np.random.random() < 0.5 transpose = None if aug_dict['transpose']: transpose = list(range( 0, len(shape) - 1)) # The last dim is the channel dim which should not be touched np.random.shuffle(transpose) return dict(noise=noise, random_smooth=random_smoothing, displace_slices=displace_slices, rotation=rotation, shear=shear, zoom=zoom, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, depth_flip=depth_flip, transpose=transpose, brightness=brightness, contrast=contrast)
def _validate_demultiplexed(qclient, job_id, prep_info, files, out_dir): """Validate and fix a new 'Demultiplexed' artifact Parameters ---------- qclient : qiita_client.QiitaClient The Qiita server client job_id : str The job id prep_info : dict of {str: dict of {str: str}} The prep information keyed by sample id files : dict of {str: list of str} The files to add to the new artifact, keyed by filepath type out_dir : str The output directory Returns ------- dict The results of the job """ qclient.update_job_step(job_id, "Step 2: Validating 'Demultiplexed' files") supported_fp_types = { 'preprocessed_fasta', 'preprocessed_fastq', 'preprocessed_demux', 'log' } unsupported_fp_types = set(files) - supported_fp_types if unsupported_fp_types: error_msg = ("Filepath type(s) %s not supported by artifact type " "Demultiplexed. Supported filepath types: %s" % (', '.join(unsupported_fp_types), ', '.join( sorted(supported_fp_types)))) return False, None, error_msg # At most one file of each type can be provided offending = set(fp_t for fp_t, fps in files.items() if len(fps) > 1) if offending: errors = [ "%s (%d): %s" % (fp_t, len(files[fp_t]), ', '.join(files[fp_t])) for fp_t in sorted(offending) ] error_msg = ("Only one filepath of each file type is supported, " "offending types:\n%s" % "; ".join(errors)) return False, None, error_msg # Check which files we have available: fasta = (files['preprocessed_fasta'][0] if 'preprocessed_fasta' in files else None) fastq = (files['preprocessed_fastq'][0] if 'preprocessed_fastq' in files else None) demux = (files['preprocessed_demux'][0] if 'preprocessed_demux' in files else None) log = (files['log'][0] if 'log' in files else None) if demux: # If demux is available, use that one to perform the validation and # generate the fasta and fastq from it success, a_info, error_msg = _validate_demux_file(qclient, job_id, prep_info, out_dir, demux, log_fp=log) elif fastq: # Generate the demux file from the fastq demux = join(out_dir, "%s.demux" % splitext(basename(fastq))[0]) with File(demux, 'w') as f: # to_hdf5 expects a list to_hdf5([fastq], f) # Validate the demux, providing the original fastq success, a_info, error_msg = _validate_demux_file(qclient, job_id, prep_info, out_dir, demux, fastq_fp=fastq, log_fp=log) elif fasta: # Generate the demux file from the fasta demux = join(out_dir, "%s.demux" % splitext(basename(fasta))[0]) with File(demux, 'w') as f: # to_hdf5 expects a list to_hdf5([fasta], f) # Validate the demux, providing the original fasta success, a_info, error_msg = _validate_demux_file(qclient, job_id, prep_info, out_dir, demux, fasta_fp=fasta, log_fp=log) else: error_msg = ("Either a 'preprocessed_demux', 'preprocessed_fastq' or " "'preprocessed_fasta' file should be provided.") return False, None, error_msg return success, a_info, error_msg
def save_model_state_dict(hf: h5py.File, state_dict: Dict[str, ModelParameter]) -> None: g = hf.create_group(MODEL_STATE_DICT_GROUP, track_order=True) for public_name, param in state_dict.items(): dataset = g.create_dataset(public_name, data=param.tensor.numpy()) dataset.attrs[STATE_DICT_KEY_ATTR] = param.private_name
def makeils4(band,inputslit,resolving_power=0,transfer=0,ils_grid=[]): ''' #VERSION 2 makes this a function and allows for pointils to be #fabricated outside of this function #make function to generate full ils including slit, pointils, and #spectrograph image quality. # #this still assumes center of band only #VERSION 3 makes slit be defined outside and allows Slit Homogenizer #transfer function to be applied #VERSION 4 uses SH transfer functions made using replanned optical #design and pointils2.pro #INPUTS: # band: GeoCarb band identification, following: # 0 = O2A Band (0.765 microns) # 1 = WCO2 Band (1.606 microns) # 2 = SCO2 Band (2.06 microns) # 3 = CH4/CO Band (2.32 microns) # # wave: Optional wavelength grid upon which to calculate the # ILS. If not specified, default will be a wavelength array # that is 20000 elements long with wavelength centered on # band and wavelength per pixel of 1e-7 microns ''' deltawave = 1e-6 #assume error budget is 35 microns slitwidth = 36. #microns #slitwidth = 27.45 #microns [sigma,alpha,beta0,order,fcam] = get_geocarb_gratinginfo(band) #find central wavelength cenwave = gratinglambda(sigma,alpha,beta0,m=order) wave=np.arange(-int(0.001*2/deltawave)/2-0.5,int(0.001*2/deltawave)/2+1)*deltawave + cenwave #compute beta angles for these wavelengths betas = betaangle(wave,sigma,alpha,m=order) pointils = pointils2(band,wave) #linear position at the detector dx = (betas-beta0)*dtor*fcam*1000. #ALLOW FOR SLIT FUNCTION TO BE DEFINED BEFOREHAND. THIS ALLOWS FOR #INHOMOGENEOUS SLIT ILLUMINATIONS #NOW COMPUTE SLIT FUNCTION gratingmag = np.cos(alpha*dtor)/np.cos(beta0*dtor) dxslit = slitwidth*gratingmag #magnified by grating inslit = np.where(abs(dx) <= dxslit/2)[0] detslit = np.zeros(len(dx)) detslit[inslit]=1. #INPUT SLIT FUNCTION HAS BEEN DEFINED ON 0 TO 1 SCALE for scale of -27 #microns to +27 microns at cross-slit telescope focal plane #so need new dx that scales as 0 to 1 across imaged slit width inputx = slitwidth*(np.linspace(0,1,len(inputslit))-0.5) inputxsh = 1.5*inputx if transfer: transferf= File(trf_fid, 'r')['arrays'][band,:,:] zin = File(trf_fid, 'r')['zin'][:] zout = File(trf_fid, 'r')['zout'][:] shinput = np.interp(zin,inputxsh,inputslit) shoutput = np.matmul(transferf,shinput.T) f_interp = interp1d(zout*gratingmag,shoutput,fill_value=0.,bounds_error=False) slit = f_interp(dx) nz = np.where(slit > 0) else: #NOW MAKE SLIT BEFORE USE OF SLIT HOMOGENIZER in case /transfer not used baseslit = np.zeros(len(inputxsh)) for ix,x in enumerate(inputxsh): if (x >= inputx.min())*(x <= inputx.max()): baseslit[ix] = np.interp(inputxsh[ix],inputx,inputslit) slit = np.interp(dx,inputxsh*gratingmag,baseslit) #NOW COMPUTE SPECTROGRAPH IMAGE QUALITY #FOR NOW ASSUME DIFFRACTION LIMIT if resolving_power > 0: specim = np.exp(-dx**2/(cenwave/resolving_power/np.sqrt(np.log(2))**2)) specim = specim/np.max(specim) #NOW GENERATE PIXEL FUNCTION #pixels are 18 microns pix = np.zeros(len(dx)) inpix = np.where(abs(dx) <= 9.)[0] pix[inpix]=1. #NOW START THE CONVOLUTIONS #FIRST CONVOLVE SLIT WITH POINTILS ils0 = convolve1d(slit,pointils,mode='constant',cval=0.0)#conv_circ(slit,pointils)#, mode='constant') #NEXT CONVOLVE THIS WITH SPECTROGRAPH IMAGE QUALITY if resolving_power > 0: ils1 = convolve1d(ils0,specim,mode='constant',cval=0.0)#, mode='constant') else: ils1 = ils0[:] #NEXT CONVOLVE THIS WITH PIXEL FUNCTION ils2 = convolve1d(ils1,pix,mode='constant',cval=0.0)#conv_circ(ils1,pix)#, mode='constant') out_wave = wave[:] if len(ils_grid) > 0: out_wave = cenwave+ils_grid ils_g = np.interp(out_wave,wave,ils2) return out_wave-cenwave,ils_g,dx,slit
def save(self, file: H5File) -> None: """Save the grid state into an HDF5 file.""" for dim in ('x', 'xv', 'y', 'yv', 'z', 'zv'): d = file.create_dataset(dim, data=getattr(self, dim)) d.make_scale(dim)
def setUp(self): self.f = File(self.mktemp(), 'w') for x in ('/test/a','/test/b','/test/c','/test/d'): self.f.create_group(x) self.group = self.f['test']
class TestExternalLinks(TestCase): """ Feature: Create and manage external links """ def setUp(self): self.f = File(self.mktemp(), 'w') self.ename = self.mktemp() self.ef = File(self.ename, 'w') self.ef.create_group('external') self.ef.close() def tearDown(self): if self.f: self.f.close() if self.ef: self.ef.close() def test_epath(self): """ External link paths attributes """ el = ExternalLink('foo.hdf5', '/foo') self.assertEqual(el.filename, 'foo.hdf5') self.assertEqual(el.path, '/foo') def test_erepr(self): """ External link repr """ el = ExternalLink('foo.hdf5','/foo') self.assertIsInstance(repr(el), six.string_types) def test_create(self): """ Creating external links """ self.f['ext'] = ExternalLink(self.ename, '/external') grp = self.f['ext'] self.ef = grp.file self.assertNotEqual(self.ef, self.f) self.assertEqual(grp.name, '/external') def test_exc(self): """ KeyError raised when attempting to open broken link """ self.f['ext'] = ExternalLink(self.ename, '/missing') with self.assertRaises(KeyError): self.f['ext'] # I would prefer IOError but there's no way to fix this as the exception # class is determined by HDF5. def test_exc_missingfile(self): """ KeyError raised when attempting to open missing file """ self.f['ext'] = ExternalLink('mongoose.hdf5','/foo') with self.assertRaises(KeyError): self.f['ext'] def test_close_file(self): """ Files opened by accessing external links can be closed Issue 189. """ self.f['ext'] = ExternalLink(self.ename, '/') grp = self.f['ext'] f2 = grp.file f2.close() self.assertFalse(f2) @ut.skipIf(NO_FS_UNICODE, "No unicode filename support") def test_unicode_encode(self): """ Check that external links encode unicode filenames properly Testing issue #732 """ ext_filename = os.path.join(mkdtemp(), u"α.hdf5") with File(ext_filename, "w") as ext_file: ext_file.create_group('external') self.f['ext'] = ExternalLink(ext_filename, '/external') @ut.skipIf(NO_FS_UNICODE, "No unicode filename support") def test_unicode_decode(self): """ Check that external links decode unicode filenames properly Testing issue #732 """ ext_filename = os.path.join(mkdtemp(), u"α.hdf5") with File(ext_filename, "w") as ext_file: ext_file.create_group('external') ext_file["external"].attrs["ext_attr"] = "test" self.f['ext'] = ExternalLink(ext_filename, '/external') self.assertEqual(self.f["ext"].attrs["ext_attr"], "test") def test_unicode_hdf5_path(self): """ Check that external links handle unicode hdf5 paths properly Testing issue #333 """ ext_filename = os.path.join(mkdtemp(), "external.hdf5") with File(ext_filename, "w") as ext_file: ext_file.create_group(u'α') ext_file[u"α"].attrs["ext_attr"] = "test" self.f['ext'] = ExternalLink(ext_filename, u'/α') self.assertEqual(self.f["ext"].attrs["ext_attr"], "test")
def setUp(self): self.f = File(self.mktemp(), 'w') self.ename = self.mktemp() self.ef = File(self.ename, 'w') self.ef.create_group('external') self.ef.close()
for pol in POLS: out = "%s%s%s" % (out_prefix, pol, out_suffix) if os.path.exists(out): if opts.delete: os.remove(out) else: raise RuntimeError("%s exists" % out) # get metadata logging.debug("getting metadata") gridnum, t = get_meta(obsid) logging.info("using centroid time %s", t.isot) # open beam file logging.debug("generate spline from beam file") df = File(opts.beam_path, "r") if opts.chan_str is not None: low_index, n_chan = coarse_range(df["chans"][...], opts.chan_str) weights = trap(n_chan) logging.info( "averaging channels %s Hz with weights %s", df["chans"][low_index:low_index + n_chan], weights, ) beams = get_avg_beam_spline(df, gridnum, low_index, n_chan, weights) else: low_index, weight1 = mhz_to_index_weight(df["chans"][...], opts.freq_mhz) weights = np.array((weight1, 1 - weight1)) logging.info( "averaging channels %s Hz with weights %s",
from h5py import File from matplotlib.pyplot import * from numpy import pi, average fh = File('snapshots/snapshots_s1.h5', 'r') ze = fh['/tasks/ze bot'][0, :, :, 0] t = fh['/scales/sim_time'] fig, ax = subplots() imshow(ze, origin='bottom', cmap='Greys') xticks([]) yticks([]) print(t[:]) savefig('ze.png', dpi=256, bbox_inches='tight')
def chang2nwb(blockpath, outpath=None, session_start_time=None, session_description=None, identifier=None, anin4=False, ecog_format='auto', external_subject=True, include_pitch=False, include_intensity=False, speakers=True, mic=False, mini=False, hilb=False, verbose=False, imaging_path=None, parse_transcript=False, include_cortical_surfaces=True, include_electrodes=True, include_ekg=True, subject_image_list=None, rest_period=None, load_warped=False, **kwargs): """ Parameters ---------- blockpath: str outpath: None | str if None, output = [blockpath]/[blockname].nwb session_start_time: datetime.datetime default: datetime(1900, 1, 1) session_description: str default: blockname identifier: str default: blockname anin4: False | str Whether or not to convert ANIN4. ANIN4 is used as an extra channel for things like button presses, and is usually unused. If a string is supplied, that is used as the name of the timeseries. ecog_format: str ({'htk'}, 'mat', 'raw') external_subject: bool (optional) True: (default) cortical mesh is saved in an external file and a link is provided to that file. This is useful if you have multiple sessions for a single subject. False: cortical mesh is saved normally include_pitch: bool (optional) add pitch data. Default: False include_intensity: bool (optional) add intensity data. Default: False speakers: bool (optional) Default: False mic: bool (optional) default: False mini: only save data stub. Used for testing hilb: bool include Hilbert Transform data. Default: False verbose: bool (optional) imaging_path: str (optional) None: use IMAGING_DIR 'local': use subject_dir/Imaging/ else: use supplied string parse_transcript: str (optional) include_cortical_surfaces: bool (optional) include_electrodes: bool (optional) include_ekg: bool (optional) subject_image_list: list (optional) List of paths of images to include rest_period: None | array-like kwargs: dict passed to pynwb.NWBFile Returns ------- """ behav_module = None basepath, blockname = os.path.split(blockpath) subject_id = get_subject_id(blockname) if identifier is None: identifier = blockname if session_description is None: session_description = blockname if outpath is None: outpath = blockpath + '.nwb' out_base_path = os.path.split(outpath)[0] if session_start_time is None: session_start_time = datetime(1900, 1, 1).astimezone(timezone('UTC')) if imaging_path is None: subj_imaging_path = path.join(IMAGING_PATH, subject_id) elif imaging_path == 'local': subj_imaging_path = path.join(basepath, 'imaging') else: subj_imaging_path = os.path.join(imaging_path, subject_id) # file paths bad_time_file = path.join(blockpath, 'Artifacts', 'badTimeSegments.mat') ecog_path = path.join(blockpath, 'RawHTK') ecog400_path = path.join(blockpath, 'ecog400', 'ecog.mat') elec_metadata_file = path.join(subj_imaging_path, 'elecs', 'TDT_elecs_all.mat') mesh_path = path.join(subj_imaging_path, 'Meshes') pial_files = glob.glob(path.join(mesh_path, '*pial.mat')) # Create the NWB file object nwbfile = NWBFile(session_description, identifier, session_start_time, datetime.now().astimezone(), session_id=identifier, institution='University of California, San Francisco', lab='Chang Lab', **kwargs) nwbfile.add_electrode_column('bad', 'electrode identified as too noisy') bad_elecs_inds = get_bad_elecs(blockpath) if include_electrodes: add_electrodes(nwbfile, elec_metadata_file, bad_elecs_inds, load_warped=load_warped) else: device = nwbfile.create_device('256Grid') electrode_group = nwbfile.create_electrode_group( name='256Grid electrodes', description='auto_group', location='location', device=device) for elec_counter in range(256): bad = elec_counter in bad_elecs_inds nwbfile.add_electrode(id=elec_counter + 1, x=np.nan, y=np.nan, z=np.nan, imp=np.nan, location=' ', filtering='none', group=electrode_group, bad=bad) ecog_elecs = list(range(len(nwbfile.electrodes))) ecog_elecs_region = nwbfile.create_electrode_table_region( ecog_elecs, 'ECoG electrodes on brain') # Read electrophysiology data from HTK files and add them to NWB file if ecog_format == 'auto': ecog_rate, data, ecog_path = auto_ecog(blockpath, ecog_elecs, verbose=False) elif ecog_format == 'htk': if verbose: print('reading htk acquisition...', flush=True) ecog_rate, data = readhtks(ecog_path, ecog_elecs) data = data.squeeze() if verbose: print('done', flush=True) elif ecog_format == 'mat': with File(ecog400_path, 'r') as f: data = f['ecogDS']['data'][:, ecog_elecs] ecog_rate = f['ecogDS']['sampFreq'][:].ravel()[0] ecog_path = ecog400_path elif ecog_format == 'raw': ecog_path = os.path.join(tdt_data_path, subject_id, blockname, 'raw.mat') ecog_rate, data = load_wavs(ecog_path) else: raise ValueError('unrecognized argument: ecog_format') ts_desc = "all Wav data" if mini: data = data[:2000] ecog_ts = ElectricalSeries(name='ElectricalSeries', data=H5DataIO(data, compression='gzip'), electrodes=ecog_elecs_region, rate=ecog_rate, description=ts_desc, conversion=0.001) nwbfile.add_acquisition(ecog_ts) if include_ekg: ekg_elecs = find_ekg_elecs(elec_metadata_file) if len(ekg_elecs): add_ekg(nwbfile, ecog_path, ekg_elecs) if mic: # Add microphone recording from room fs, data = get_analog(blockpath, 1) nwbfile.add_acquisition( TimeSeries('microphone', data, 'audio unit', rate=fs, description="audio recording from microphone in room")) if speakers: fs, data = get_analog(blockpath, 2) # Add audio stimulus 1 nwbfile.add_stimulus( TimeSeries('speaker 1', data, 'NA', rate=fs, description="audio stimulus 1")) # Add audio stimulus 2 fs, data = get_analog(blockpath, 3) if fs is not None: nwbfile.add_stimulus( TimeSeries('speaker 2', data, 'NA', rate=fs, description='the second stimulus source')) if anin4: fs, data = get_analog(blockpath, 4) nwbfile.add_acquisition( TimeSeries(anin4, data, 'aux unit', rate=fs, description="aux analog recording")) # Add bad time segments if os.path.exists(bad_time_file) and os.stat(bad_time_file).st_size: bad_time = sio.loadmat(bad_time_file)['badTimeSegments'] for row in bad_time: nwbfile.add_invalid_time_interval(start_time=row[0], stop_time=row[1], tags=('ECoG artifact', ), timeseries=ecog_ts) if rest_period is not None: nwbfile.add_epoch_column(name='label', description='label') nwbfile.add_epoch(start_time=rest_period[0], stop_time=rest_period[1], label='rest_period') if hilb: block_hilb_path = os.path.join(hilb_dir, subject_id, blockname, blockname + '_AA.h5') file = File(block_hilb_path, 'r') data = transpose_iter( file['X']) # transposes data during iterative write filter_center = file['filter_center'][:] filter_sigma = file['filter_sigma'][:] data = H5DataIO(DataChunkIterator(tqdm(data, desc='writing hilbert data'), buffer_size=400 * 20), compression='gzip') decomp_series = DecompositionSeries( name='LFPDecompositionSeries', description='Gaussian band Hilbert transform', data=data, rate=400., source_timeseries=ecog_ts, metric='amplitude') for band_mean, band_stdev in zip(filter_center, filter_sigma): decomp_series.add_band(band_mean=band_mean, band_stdev=band_stdev) hilb_mod = nwbfile.create_processing_module( name='ecephys', description='holds hilbert analysis results') hilb_mod.add_container(decomp_series) if include_cortical_surfaces: subject = ECoGSubject(subject_id=subject_id) subject.cortical_surfaces = create_cortical_surfaces( pial_files, subject_id) else: subject = Subject(subject_id=subject_id, species='H**o sapiens') if subject_image_list is not None: subject = add_images_to_subject(subject, subject_image_list) if external_subject: subj_fpath = path.join(out_base_path, subject_id + '.nwb') if not os.path.isfile(subj_fpath): subj_nwbfile = NWBFile(session_description=subject_id, identifier=subject_id, subject=subject, session_start_time=datetime( 1900, 1, 1).astimezone(timezone('UTC'))) with NWBHDF5IO(subj_fpath, manager=manager, mode='w') as subj_io: subj_io.write(subj_nwbfile) subj_read_io = NWBHDF5IO(subj_fpath, manager=manager, mode='r') subj_nwbfile = subj_read_io.read() subject = subj_nwbfile.subject nwbfile.subject = subject if parse_transcript: if parse_transcript == 'CV': parseout = parse(blockpath, blockname) df = make_df(parseout, 0, subject_id, align_pos=1) nwbfile.add_trial_column('cv_transition_time', 'time of CV transition in seconds') nwbfile.add_trial_column( 'speak', 'if True, subject is speaking. If False, subject is listening') nwbfile.add_trial_column('condition', 'syllable spoken') for _, row in df.iterrows(): nwbfile.add_trial(start_time=row['start'], stop_time=row['stop'], cv_transition_time=row['align'], speak=row['mode'] == 'speak', condition=row['label']) elif parse_transcript == 'singing': parseout = parse(blockpath, blockname) df = make_df(parseout, 0, subject_id, align_pos=0) if not len(df): df = pd.DataFrame(parseout) df['mode'] = 'speak' df = df.loc[df['label'].astype('bool'), :] # handle empty labels nwbfile.add_trial_column( 'speak', 'if True, subject is speaking. If False, subject is listening') nwbfile.add_trial_column('condition', 'syllable spoken') for _, row in df.iterrows(): nwbfile.add_trial(start_time=row['start'], stop_time=row['stop'], speak=row['mode'] == 'speak', condition=row['label']) elif parse_transcript == 'emphasis': parseout = parse(blockpath, blockname) try: df = make_df(parseout, 0, subject_id, align_pos=0) except: df = pd.DataFrame(parseout) if not len(df): df = pd.DataFrame(parseout) df = df.loc[df['label'].astype('bool'), :] # handle empty labels nwbfile.add_trial_column('condition', 'word emphasized') nwbfile.add_trial_column( 'speak', 'if True, subject is speaking. If False, subject is listening') for _, row in df.iterrows(): nwbfile.add_trial(start_time=row['start'], stop_time=row['stop'], speak=True, condition=row['label']) elif parse_transcript == 'MOCHA': nwbfile = create_transcription(nwbfile, transcript_path, blockname) # behavior if include_pitch: if behav_module is None: behav_module = nwbfile.create_processing_module( 'behavior', 'processing about behavior') if os.path.isfile( os.path.join(blockpath, 'pitch_' + blockname + '.mat')): fs, data = load_pitch(blockpath) pitch_ts = TimeSeries( data=data, rate=fs, unit='Hz', name='pitch', description= 'Pitch as extracted from Praat. NaNs mark unvoiced regions.') behav_module.add_container( BehavioralTimeSeries(name='pitch', time_series=pitch_ts)) else: print('No pitch file for ' + blockname) if include_intensity: if behav_module is None: behav_module = nwbfile.create_processing_module( 'behavior', 'processing about behavior') if os.path.isfile( os.path.join(blockpath, 'intensity_' + blockname + '.mat')): fs, data = load_pitch(blockpath) intensity_ts = TimeSeries( data=data, rate=fs, unit='dB', name='intensity', description='Intensity of speech in dB extracted from Praat.') behav_module.add_container( BehavioralTimeSeries(name='intensity', time_series=intensity_ts)) else: print('No intensity file for ' + blockname) # Export the NWB file with NWBHDF5IO(outpath, manager=manager, mode='w') as io: io.write(nwbfile) if external_subject: subj_read_io.close() if hilb: file.close() # read check with NWBHDF5IO(outpath, manager=manager, mode='r') as io: io.read()
class TestAdditionalMappingFuncs(BaseMapping): """ Feature: Other dict methods (pop, pop_item, clear, update, setdefault) are available. """ def setUp(self): self.f = File(self.mktemp(), 'w') for x in ('/test/a','/test/b','/test/c','/test/d'): self.f.create_group(x) self.group = self.f['test'] def tearDown(self): if self.f: self.f.close() def test_pop_item(self): """.pop_item exists and removes item""" key, val = self.group.popitem() self.assertNotIn(key, self.group) def test_pop(self): """.pop exists and removes specified item""" self.group.pop('a') self.assertNotIn('a', self.group) def test_pop_default(self): """.pop falls back to default""" # e shouldn't exist as a group value = self.group.pop('e', None) self.assertEqual(value, None) def test_pop_raises(self): """.pop raises KeyError for non-existence""" # e shouldn't exist as a group with self.assertRaises(KeyError): key = self.group.pop('e') def test_clear(self): """.clear removes groups""" self.group.clear() self.assertEqual(len(self.group), 0) def test_update_dict(self): """.update works with dict""" new_items = {'e': np.array([42])} self.group.update(new_items) self.assertIn('e', self.group) def test_update_iter(self): """.update works with list""" new_items = [ ('e', np.array([42])), ('f', np.array([42])) ] self.group.update(new_items) self.assertIn('e', self.group) def test_update_kwargs(self): """.update works with kwargs""" new_items = {'e': np.array([42])} self.group.update(**new_items) self.assertIn('e', self.group) def test_setdefault(self): """.setdefault gets group if it exists""" value = self.group.setdefault('a') self.assertEqual(value, self.group.get('a')) def test_setdefault_with_default(self): """.setdefault gets default if group doesn't exist""" # e shouldn't exist as a group # 42 used as groups should be strings value = self.group.setdefault('e', np.array([42])) self.assertEqual(value, 42) def test_setdefault_no_default(self): """ .setdefault gets None if group doesn't exist, but as None isn't defined as data for a dataset, this should raise a TypeError. """ # e shouldn't exist as a group with self.assertRaises(TypeError): self.group.setdefault('e')
def shapes(filename): with File(filename, 'r') as f: bp = f['Background_Period'][...] bunches = f['bunches'][...] return (bunches % bp != 0).sum()
def get_frame_rate(self): with File(self.from_path, 'r') as f: frame_rate = float(f['emAnalysisOutput/eventOptions/framerate'][:]) return frame_rate
def create_theoretical_ils(): # read in the ILS spectral grid fid = File(arp_fid,'r') dlam = fid['SpectralConversion/ils_delta_lambda'][:][:,0,0,:] slit_length = 1000 #subslit_alb.shape[-1] inslit = {} inslit['uniform'] = np.ones(slit_length) inslit['point'] = np.zeros(slit_length)+0.01 inslit['point'][int(slit_length/5)] = 1. inslit['Quarter'] = np.ones(slit_length) inslit['Quarter'][:int(0.25*slit_length)] = 0. inslit['Half'] = np.ones(slit_length) inslit['Half'][:int(slit_length/2)] = 0. inslit['ThreeQuarter'] = np.ones(slit_length) inslit['ThreeQuarter'][:int(0.75*slit_length)] = 0 inslit['linear'] = np.linspace(0,1,slit_length) inslit['subslit'] = np.zeros(slit_length) inslit['subslit'][int(0.25*slit_length):int(0.5*slit_length)] = 1 slit_keys = ['uniform']#,'Quarter','Half','ThreeQuarter']#,'subslit'] ils = {} slit = {} for ib,b in zip(range(4),['nir','wco2','sco2','ch4']): #plt.figure() # if ib < 3: dl = dlam[ib] # else: # dl = dlam[ib-1] names = ['uniform','uniform SH'] for ky in slit_keys: names.extend([ky,ky+' SH']) ils[b] = {} slit[b] = {} for ky in slit_keys: for ish,sh in enumerate(['no_homog','with_homog']): k = ky+'_'+sh ils[b][k] = {} slit[b][k] = {} wave,tils,slit_grid,slit_val = makeils4(ib,inslit[ky]*100.,transfer=ish,ils_grid=dl) ils[b][k]['value'] = tils/np.trapz(tils,wave) cdf = np.array([np.trapz(ils[b][k]['value'][:i],wave[:i]) for i in range(len(wave))]) ils[b][k]['offset'] = np.where(cdf <= 0.5)[0][-1] slit[b][k]['value'] = slit_val[:] slit[b][k]['grid'] = slit_grid[:] ils[b]['grid'] = wave[:] if plot_ils: #jet = cm = plt.get_cmap('jet') #cNorm = colors.Normalize(vmin=0, vmax=fp_nums[-1]) #scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet) fig = plt.figure(figsize=(8,14)) gs = GridSpec(3,1) ax = fig.add_subplot(gs[0,0]) inds = np.where(slit[b]['uniform_with_homog']['value'] > 0)[0] labels = [] for ky in slit_keys: ax.plot(slit[b][ky+'_with_homog']['grid'][inds],slit[b][ky+'_with_homog']['value'][inds]/slit[b]['uniform_with_homog']['value'].max()*100)#,'--',color=scalar) labels.append(ky+' SH') plt.legend(labels,ncol=len(slit_keys)+1) plt.title('Band '+str(b)+' Slit Functions') ax = fig.add_subplot(gs[1,0]) labels = [] inds = np.where(ils[b]['uniform_with_homog']['value'] >= ils[b]['uniform_with_homog']['value'].max()*0.02)[0] for ky in slit_keys: ax.plot(wave[inds],ils[b][ky+'_with_homog']['value'][inds])#,ls='--',color=line_colors[ky])#scalarMap.to_rgba(ky),ls='--') labels.append(str(ky)+' SH') plt.legend(labels,ncol=2) plt.title('Band '+str(b)+' Normalized ISRF') ax = fig.add_subplot(gs[2,0]) labels=[] for ky in slit_keys: ax.plot(wave[inds],(ils[b][ky+'_with_homog']['value'][inds]-ils[b]['uniform_with_homog']['value'][inds])/ils[b]['uniform_with_homog']['value'][inds].max()*100.)#,color=line_colors[ky])#scalarMap.to_rgba(ky)) ax.set_ylabel('% Error') ax.set_ylim([-20,20]) labels.append(str(ky)+' SH') #plt.yscale('log') plt.legend(labels,ncol=2) plt.title('Band '+str(b)+' ISRF Percentage Errors') plt.tight_layout() plt.savefig('slit_plot_band%s.png'%b,bbox_inches='tight') plt.show()
def add_img_masks(self): with File(self.from_path, 'r') as f: img_masks = f['emAnalysisOutput/cellImages'][:] for img_mask in img_masks: self.ps.add_roi(image_mask=img_mask)
num_unique_beams = len(SWEETSPOTS) #num_unique_beams = N beam_shape = [num_unique_beams, len(FREQS), N_POL] + list(az.shape) chunks = tuple([1, 1, N_POL] + list(az.shape)) # theta phi (and rX, rY) are 1D arrays. #theta = ((np.pi/2) - np.radians(alt)).ravel #phi = np.radians(az) theta = (np.pi / 2) - np.radians(alt.ravel()) phi = np.radians(az.ravel()) if opts.dry_run: mode = 'r' else: mode = 'w' with File(OUT_FILE, mode=mode) as df: if not opts.dry_run: # actual beam data data = df.create_dataset('beams', beam_shape, chunks=chunks, compression='lzf', shuffle=True) # various metadata df.attrs['BIBCODE'] = '2017PASA...34...62S' df.attrs['VERSION'] = '02' df['beams'].dims[0].label = 'beam' df.create_dataset('sweetspot_number', data=SWEETSPOTS) df['beams'].dims.create_scale(df['sweetspot_number']) df['beams'].dims[0].attach_scale(df['sweetspot_number'])
# -*- coding: utf-8 -*- """ Created on Fri Jan 16 14:42:28 2015 @author: thomasaref """ from pyaudio import PyAudio from numpy import sin, pi, amax, amin, linspace, interp, zeros, log10, shape #from Atom_HDF5 import Read_HDF5 #from Atom_Plotter import Plotter from h5py import File base_dir = "/Users/thomasaref/Dropbox/Current stuff/TA_enaml" main_dir = 'Time domain, gate 5mV with flux sweep -10dbm 2013-11-04_093303' file_name = "meas.h5" with File(base_dir + "/" + main_dir + "/" + file_name) as f: mag = f['Mag']['I'][:] time = f['Mag']['time'][:] yok = f['Mag']['Yoko voltage'][:] #f=Read_HDF5(read_file='/Users/thomasaref/Dropbox/Current stuff/TA_enaml/Two tone, egate n91dbm idt n127dbm 2013-10-13_213934/meas.h5') #f=Read_HDF5(read_file='/Users/thomasaref/Dropbox/Current stuff/TA_enaml/Two tone, egate n167 dBm, Idt n127dBm 2013-10-30_083942/meas.h5') #f=Read_HDF5(read_file='/Users/thomasaref/Dropbox/Current stuff/TA_enaml/two tone, flux vs control freq, egate n111dbm, IDT n127dbm 2013-10-12_104752/meas.h5') #f=Read_HDF5(read_file="/Users/thomasaref/Dropbox/Current stuff/TA_enaml/ #f=Read_HDF5(main_dir="Listening, PXI source, n60 to 0 dBm, Yok n0p6 to n0p2 2013-11-14_132730") #f=Read_HDF5(main_dir='Pulse 25ns improved gate V sweep Yoko 0p696V 2013-11-06_102036') #f=Read_HDF5(main_dir='Time domain, gate 5mV with flux sweep -10dbm 2013-11-04_093303') #f.open_and_read() #print shape(mag_vec), shape(anr), shape(yok) #print f.data['Mag'].keys()
def save_embeddings(hf: h5py.File, embeddings: FloatTensorType) -> None: hf.create_dataset(EMBEDDING_DATASET, data=embeddings.numpy())
def temp_emsoft_h5ebsd_file(tmpdir, request): """Create a dummy EMsoft h5ebsd .h5 file from input. Parameters expected in `request` -------------------------------- map_shape : tuple of ints Map shape to create. step_sizes : tuple of floats Step sizes in x and y coordinates in nanometres. rotations : np.ndarray A sample, smaller than the map size, of example rotations as rows of Euler angle triplets. n_top_matches : int Number of top matching orientations per data point kept. refined : bool Whether refined Euler angles and dot products are read. """ f = File(tmpdir.join("emsoft_h5ebsd_file.h5"), mode="w") # Unpack parameters map_shape, (dy, dx), example_rotations, n_top_matches, refined = request.param ny, nx = map_shape map_size = ny * nx # Create groups used in reader ebsd_group = f.create_group("Scan 1/EBSD") data_group = ebsd_group.create_group("Data") header_group = ebsd_group.create_group("Header") phase_group = header_group.create_group("Phase/1") # Always single phase # Create `header_group` datasets used in reader for name, data, dtype in zip( ["nRows", "nColumns", "Step Y", "Step X"], [ny, nx, dy, dx], [np.int32, np.int32, np.float32, np.float32], ): header_group.create_dataset(name, data=np.array([data], dtype=dtype)) # Create `data_group` datasets, mostly quality metrics data_group.create_dataset("X Position", data=np.tile(np.arange(nx) * dx, ny)) # Note that "Y Position" is wrongly written to their h5ebsd file by EMsoft data_group.create_dataset( "Y Position", data=np.tile(np.arange(nx) * dx, ny), # Wrong # data=np.sort(np.tile(np.arange(ny) * dy, nx)), # Correct ) for name, shape, dtype in [ ("AvDotProductMap", map_shape, np.int32), ("CI", map_size, np.float32), ("CIMap", map_shape, np.int32), ("IQ", map_size, np.float32), ("IQMap", map_shape, np.int32), ("ISM", map_size, np.float32), ("ISMap", map_shape, np.int32), ("KAM", map_shape, np.float32), ("OSM", map_shape, np.float32), ("Phase", map_size, np.uint8), ]: data_group.create_dataset(name, data=np.zeros(shape, dtype=dtype)) # `data_group` with rotations # Sample as many rotations from `rotations` as `map_size` rot_idx = np.random.choice(np.arange(len(example_rotations)), map_size) rot = example_rotations[rot_idx] n_sampled_oris = 333227 # Cubic space group with Ncubochoric = 100 data_group.create_dataset("FZcnt", data=np.array([n_sampled_oris], dtype=np.int32)) data_group.create_dataset( "TopMatchIndices", data=np.vstack( (np.random.choice(np.arange(n_sampled_oris), n_top_matches), ) * map_size), dtype=np.int32, ) data_group.create_dataset( "TopDotProductList", data=np.vstack((np.random.random(size=n_top_matches), ) * map_size), dtype=np.float32, ) # In degrees data_group.create_dataset( "DictionaryEulerAngles", data=np.column_stack((np.linspace(150, 160, n_sampled_oris), ) * 3), dtype=np.float32, ) if refined: data_group.create_dataset("RefinedEulerAngles", data=rot.astype(np.float32)) data_group.create_dataset("RefinedDotProducts", data=np.zeros(map_size, dtype=np.float32)) # Number of top matches kept f.create_dataset( "NMLparameters/EBSDIndexingNameListType/nnk", data=np.array([n_top_matches], dtype=np.int32), ) # `phase_group` for name, data in [ ("Point Group", "Cubic (Oh) [m3m]"), ("MaterialName", "austenite/austenite"), ("Lattice Constant a", "3.595"), ("Lattice Constant b", "3.595"), ("Lattice Constant c", "3.595"), ("Lattice Constant alpha", "90.000"), ("Lattice Constant beta", "90.000"), ("Lattice Constant gamma", "90.000"), ]: phase_group.create_dataset(name, data=np.array([data], dtype=np.dtype("S"))) yield f gc.collect()
plt.rcParams.update(params) # Some constants in cgs units k_b_cgs = 1.38e-16 # boltzmann m_h_cgs = 1.67e-24 # proton mass # File containing the total energy stats_filename = "./energy.txt" # First snapshot snap_filename = "coolingBox_0000.hdf5" # Read the initial state of the gas f = File(snap_filename, "r") # Read the units parameters from the snapshot units = f["InternalCodeUnits"] unit_mass = units.attrs["Unit mass in cgs (U_M)"] unit_length = units.attrs["Unit length in cgs (U_L)"] unit_time = units.attrs["Unit time in cgs (U_t)"] # Read the adiabatic index gamma = float(f["HydroScheme"].attrs["Adiabatic index"]) def energyUnits(u): """ Compute the temperature from the internal energy. """ u *= (unit_length / unit_time) ** 2 return u * m_h_cgs / k_b_cgs
def generate_new_study_with_preprocessed_data(self): """Creates a new study up to the processed data for testing""" info = { "timeseries_type_id": 1, "metadata_complete": True, "mixs_compliant": True, "number_samples_collected": 3, "number_samples_promised": 3, "study_alias": "Test EBI", "study_description": "Study for testing EBI", "study_abstract": "Study for testing EBI", "emp_person_id": StudyPerson(2), "principal_investigator_id": StudyPerson(3), "lab_person_id": StudyPerson(1) } study = Study.create(User('*****@*****.**'), "Test EBI study", [1], info) metadata_dict = { 'Sample1': { 'collection_timestamp': datetime(2015, 6, 1, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 1' }, 'Sample2': { 'collection_timestamp': datetime(2015, 6, 2, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 2' }, 'Sample3': { 'collection_timestamp': datetime(2015, 6, 3, 7, 0, 0), 'physical_specimen_location': 'location1', 'taxon_id': 9606, 'scientific_name': 'h**o sapiens', 'Description': 'Test Sample 3' } } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) SampleTemplate.create(metadata, study) metadata_dict = { 'Sample1': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 1" }, 'Sample2': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTA', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 2" }, 'Sample3': { 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTT', 'center_name': 'KnightLab', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'Protocol ABC', 'experiment_design_description': "Random value 3" }, } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) pt = PrepTemplate.create(metadata, study, "16S", 'Metagenomics') fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE_2.format(study.id)) with File(demux_fp, 'w') as f: to_hdf5(fna_fp, f) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=pt) return ppd
def load_coach(ranking: h5py.File, coach_nick): coach = ranking.get("/coaches/{}".format(coach_nick)) if not coach: raise KeyError('load_coach %s not found. (Case sensitive)', coach_nick) return {'coach': coach, 'mu': coach['mu'], 'phi': coach['phi']}
parser = ArgumentParser() # Anoop provided the pose directory. Check # /data/home/cherian/MPII/Cheng-MPII-Pose-Action/detected_poses/ parser.add_argument('pose_path', help='path to MPII pose dir (from CPM)') # This is shipped with Cooking Activities 2. See # /data/home/sam/mpii-cooking-2/attributesAnnotations_MPII-Cooking-2.mat (I # think that's the right one, anyway). parser.add_argument('attr_path', help='path to MPII attributes file (.mat)') parser.add_argument('dest', help='path for HDF5 output file') if __name__ == '__main__': args = parser.parse_args() dir_list = glob(path.join(args.pose_path, 's*-d*-cam-*')) attr_dict = load_attrs(args.attr_path) with File(args.dest, 'w') as fp: skipped = [] with Pool() as p: seq_iter = p.imap(load_seq, ((d, attr_dict) for d in dir_list)) zipper = zip(dir_list, seq_iter) for dir_path, triple in tqdm(zipper, total=len(dir_list)): joints, actions, scale = triple id_str = path.basename(dir_path) if joints is None: skipped.append(id_str) continue prefix = '/seqs/' + id_str assert len(joints) == len(actions) fp[prefix + '/poses'] = joints fp[prefix + '/actions'] = actions fp[prefix + '/scale'] = scale
def evaluate_LCLSTao(settings, model_name='lcls_classic', input_file=None, ploton=False, epics_json=None, so_lib='', verbose=False, beam_archive_path=None, expressions=['lat::orbit.x[end]']): """ Expressions is a list of expressions that will be used to form the output beam::n_particle_loss[end] """ M = run_LCLSTao(settings=settings, model_name=model_name, input_file=input_file, ploton=ploton, epics_json=epics_json, so_lib=so_lib, verbose=verbose) output = {} for expression in expressions: try: val = M.evaluate(expression) except: print(f'error with {expression}') val = None output[expression] = val if beam_archive_path: ff = fingerprint({ 'model_name': model_name, 'input_file': input_file, 'settings': settings }) beam_archive_path = os.path.expandvars(beam_archive_path) beam_archive = os.path.abspath( os.path.join(beam_archive_path, f'bmad_beam_{ff}' + '.h5')) if verbose: print('Archiving beam to', beam_archive) M.cmd(f'write beam -at * {beam_archive}') output['beam_archive'] = beam_archive # Reopen and attach settings with File(beam_archive, 'r+') as h5: # Input g = h5.create_group('input') g.attrs['model_name'] = model_name #g.attrs['input_file'] = input_file # Settings g = h5.create_group('settings') for k, v in settings.items(): g.attrs[k] = v g = h5.create_group('expressions') for k, v in output.items(): if v: g.attrs[k] = v return output
class TestCopy(TestCase): def setUp(self): self.f1 = File(self.mktemp(), 'w') self.f2 = File(self.mktemp(), 'w') def tearDown(self): if self.f1: self.f1.close() if self.f2: self.f2.close() @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_path_to_path(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] self.f1.copy('foo', 'baz') baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_path_to_group(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] baz = self.f1.create_group('baz') self.f1.copy('foo', baz) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['foo/bar'], np.array([1,2,3])) self.f1.copy('foo', self.f2['/']) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_group_to_path(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] self.f1.copy(foo, 'baz') baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['bar'], np.array([1,2,3])) self.f2.copy(foo, 'foo') self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_group_to_group(self): foo = self.f1.create_group('foo') foo['bar'] = [1,2,3] baz = self.f1.create_group('baz') self.f1.copy(foo, baz) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertArrayEqual(baz['foo/bar'], np.array([1,2,3])) self.f1.copy(foo, self.f2['/']) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_dataset(self): self.f1['foo'] = [1,2,3] foo = self.f1['foo'] self.f1.copy(foo, 'bar') self.assertArrayEqual(self.f1['bar'], np.array([1,2,3])) self.f1.copy('foo', 'baz') self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) self.f1.copy('foo', self.f2) self.assertArrayEqual(self.f2['foo'], np.array([1,2,3])) self.f2.copy(self.f1['foo'], self.f2, 'bar') self.assertArrayEqual(self.f2['bar'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_shallow(self): foo = self.f1.create_group('foo') bar = foo.create_group('bar') foo['qux'] = [1,2,3] bar['quux'] = [4,5,6] self.f1.copy(foo, 'baz', shallow=True) baz = self.f1['baz'] self.assertIsInstance(baz, Group) self.assertIsInstance(baz['bar'], Group) self.assertEqual(len(baz['bar']), 0) self.assertArrayEqual(baz['qux'], np.array([1,2,3])) self.f2.copy(foo, 'foo', shallow=True) self.assertIsInstance(self.f2['/foo'], Group) self.assertIsInstance(self.f2['foo/bar'], Group) self.assertEqual(len(self.f2['foo/bar']), 0) self.assertArrayEqual(self.f2['foo/qux'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_without_attributes(self): self.f1['foo'] = [1,2,3] foo = self.f1['foo'] foo.attrs['bar'] = [4,5,6] self.f1.copy(foo, 'baz', without_attrs=True) self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) assert 'bar' not in self.f1['baz'].attrs self.f2.copy(foo, 'baz', without_attrs=True) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) assert 'bar' not in self.f2['baz'].attrs @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_soft_links(self): self.f1['bar'] = [1,2,3] foo = self.f1.create_group('foo') foo['baz'] = SoftLink('/bar') self.f1.copy(foo, 'qux', expand_soft=True) self.f2.copy(foo, 'foo', expand_soft=True) del self.f1['bar'] self.assertIsInstance(self.f1['qux'], Group) self.assertArrayEqual(self.f1['qux/baz'], np.array([1,2,3])) self.assertIsInstance(self.f2['/foo'], Group) self.assertArrayEqual(self.f2['foo/baz'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_external_links(self): filename = self.f1.filename self.f1['foo'] = [1,2,3] self.f2['bar'] = ExternalLink(filename, 'foo') self.f1.close() self.f1 = None self.assertArrayEqual(self.f2['bar'], np.array([1,2,3])) self.f2.copy('bar', 'baz', expand_external=True) os.unlink(filename) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) @ut.skipIf(h5py.version.hdf5_version_tuple < (1,8,9), "Bug in HDF5<1.8.8 prevents copying open dataset") def test_copy_refs(self): self.f1['foo'] = [1,2,3] self.f1['bar'] = [4,5,6] foo = self.f1['foo'] bar = self.f1['bar'] foo.attrs['bar'] = bar.ref self.f1.copy(foo, 'baz', expand_refs=True) self.assertArrayEqual(self.f1['baz'], np.array([1,2,3])) baz_bar = self.f1['baz'].attrs['bar'] self.assertArrayEqual(self.f1[baz_bar], np.array([4,5,6])) # The reference points to a copy of bar, not to bar itself. self.assertNotEqual(self.f1[baz_bar].name, bar.name) self.f1.copy('foo', self.f2, 'baz', expand_refs=True) self.assertArrayEqual(self.f2['baz'], np.array([1,2,3])) baz_bar = self.f2['baz'].attrs['bar'] self.assertArrayEqual(self.f2[baz_bar], np.array([4,5,6])) self.f1.copy('/', self.f2, 'root', expand_refs=True) self.assertArrayEqual(self.f2['root/foo'], np.array([1,2,3])) self.assertArrayEqual(self.f2['root/bar'], np.array([4,5,6])) foo_bar = self.f2['root/foo'].attrs['bar'] self.assertArrayEqual(self.f2[foo_bar], np.array([4,5,6])) # There's only one copy of bar, which the reference points to. self.assertEqual(self.f2[foo_bar], self.f2['root/bar'])
def setUp(self): self.f = File(self.mktemp(), 'w')
class PSPDataloader(Dataloader): """Load iPSP data and meta data. iPSP data comes as an HDF5 file with datasets organized in different zones. Each zone has additional meta data related to flow conditions and camera setting. The active zone may be switched by seeting the `zone` attribute. Examples >>> from flowtorch import PSPDataloader >>> loader = PSPDataloader("0226.hdf5") >>> loader.zone_names ['Zone0000', 'Zone0001'] >>> loader.info.keys() ['AngleAttackAlpha', 'DateOfRecording', 'Mach', ...] >>> loader.info["Mach"] (0.903, 'Mach number') >>> loader.zone_info.keys() ['ExposureTime', 'NumberImages', 'PSPDeviceName', 'SamplingFrequency', 'ZoneName'] >>> loader.zone Zone0000 >>> loader.zone = "Zone0001" >>> loader.zone_info["ZoneName"] HTP >>> cp = loader.load_snapshot("Cp", loader.write_times[:10]) >>> cp.shape torch.Size([250, 75, 10]) """ def __init__(self, path: str, dtype: str = DEFAULT_DTYPE): """Create PSPDataloader instance from file path. :param path: path to iPSP file :type path: str :param dtype: tensor type, defaults to DEFAULT_DTYPE :type dtype: str, optional """ self._path = path self._dtype = dtype if exists(self._path): self._file = File(self._path, mode="r") else: raise FileNotFoundError(f"Could not find file {path}") self._zone_names = None self._zone = self.zone_names[0] self._info = None def _time_to_index(self, time: Union[List[str], str]) -> Union[List[int], int]: """Find the list index of a physical write time. Snapshots are stored as multidimensional arrays in the HDF5 file. This function finds the index in the dataset's time dimension corresponding to a physical write time. :param time: write time of list of write times :type time: Union[List[str], str] :return: index or list of indices :rtype: Union[List[int], int] """ freq = self.zone_info[FREQUENCY_KEY][0] if isinstance(time, list): return [int(round(float(t) * freq, 0)) for t in time] else: return int(round(float(time) * freq, 0)) def _load_single_field(self, field_name: str, ind: Union[np.ndarray, int]) -> pt.Tensor: """Load a single field from the HDF5 file. Note that there is usually a single field available in the iPSP data, namely the pressure coefficient. :param field_name: name of the field :type field_name: str :param ind: index or array of indices to load :type ind: Union[np.ndarray, int] :return: tensor holding the field values :rtype: pt.Tensor """ return pt.tensor(self._file[f"{self._zone}/{FIELDS[field_name]}"][:, :, ind], dtype=self._dtype) def load_snapshot( self, field_name: Union[List[str], str], time: Union[List[str], str]) -> Union[List[pt.Tensor], pt.Tensor]: check_list_or_str(field_name, "field_name") check_list_or_str(time, "time") ind = self._time_to_index(time) # load multiple fields if isinstance(field_name, list): if isinstance(time, list): return [ self._load_single_field(name, np.array(ind)) for name in field_name ] else: return [ self._load_single_field(name, ind) for name in field_name ] # load single field else: if isinstance(time, list): return self._load_single_field(field_name, np.array(ind)) else: return self._load_single_field(field_name, ind) @property def zone_names(self) -> List[str]: """Find the zone names available in the HDF5 file. :raises ValueError: if no valid zones are found :return: list of zone names :rtype: List[str] """ if self._zone_names is None: keys = self._file.keys() self._zone_names = [key for key in keys if key.startswith("Zone")] if len(self._zone_names) < 1: raise ValueError(f"No valid zones in file {self._path}") return self._zone_names @property def zone(self) -> str: """Get the currently selected zone. :return: currently selected zone :rtype: str """ return self._zone @zone.setter def zone(self, zone_name: str): """Set the active zone. :param zone_name: name of the zone :type zone_name: str """ if zone_name in self._zone_names: self._zone = zone_name else: print(f"{zone_name} not found. Available zones are:") print(self._zone_names) @property def info(self) -> Dict[str, tuple]: """Get iPSP metadata valid for entire file. :return: dictionary of metadata values and descriptions :rtype: Dict[str, tuple] """ if self._info is None: parameters = self._file[f"{INFO_KEY}/{PARAMETER_KEY}"].attrs descriptions = self._file[f"{INFO_KEY}/{DESCRIPTION_KEY}"].attrs self._info = dict() for key in parameters.keys(): self._info[key] = (parameters.get(key, ""), descriptions.get(key, "")) return self._info @property def zone_info(self) -> Dict[str, tuple]: """Get iPSP metadata for the currently selected zone. :return: zone metadata :rtype: Dict[str, tuple] """ parameters = self._file[f"{self._zone}/{PARAMETER_KEY}"].attrs descriptions = self._file[f"{self._zone}/{DESCRIPTION_KEY}"].attrs self._zone_info = dict() for key in parameters.keys(): self._zone_info[key] = (parameters.get(key, ""), descriptions.get(key, "")) return self._zone_info @property def write_times(self) -> List[str]: freq = self.zone_info[FREQUENCY_KEY][0] field_name = "Cp" n_snapshots = self._file[f"{self._zone}/{FIELDS[field_name]}"].shape[ -1] times = [n / freq for n in range(n_snapshots)] # loading the time dataset directly does not always work since the dataset # keys sometimes have spelling mistakes, e.g, TimValues instead of TimeValues # times = self._file[f"{self._zone}/{TIME_KEY}"][:] return [str(round(t, 8)) for t in times] @property def field_names(self) -> Dict[str, List[str]]: return {self.write_times[0]: list(FIELDS.keys())} @property def vertices(self) -> pt.Tensor: return pt.stack([ pt.tensor(self._file[f"{self.zone}/{coord}"][:, :], dtype=self._dtype) for coord in COORDINATE_KEYS ], dim=-1) @property def weights(self) -> pt.Tensor: return pt.tensor(self._file[f"{self.zone}/{WEIGHT_KEY}"][:, :], dtype=self._dtype)