def make_linked_stack(self, fullname): """ Actually makes the stacked dataset. This is a separate method since h5py's visit items does not follow external links. fullname string key to the dataset to be converted into a stacked VDS """ datashape = h5.File(self.source_path_pattern % (self.file_numbers[0]))[fullname].shape outshape = (len(self.file_numbers), ) + datashape TGT = h5.VirtualTarget(self.target_path, fullname, shape=outshape) k = 0 VMlist = [] for fnum in self.file_numbers: print fnum source_path = self.source_path_pattern % (fnum) VSRC = h5.VirtualSource(source_path, fullname, shape=datashape) VM = h5.VirtualMap(VSRC, TGT[k:(k + 1):1], dtype=np.float) VMlist.append(VM) k += 1 d = self.outfile.create_virtual_dataset(VMlist=VMlist, fillvalue=0) for key, val in h5.File( self.source_path_pattern % (self.file_numbers[0]))[fullname].attrs.iteritems(): self.outfile[fullname].attrs[key] = val
def test_eiger_high_level(self): self.outfile = self.working_dir + 'eiger.h5' TGT = h5.VirtualTarget(self.outfile, 'data', shape=(78, 200, 200)) VMlist = [] M_minus_1 = 0 # Create the virtual dataset file with h5.File(self.outfile, 'w', libver='latest') as f: for foo in self.fname: in_data = h5.File(foo)['data'] src_shape = in_data.shape in_data.file.close() M = M_minus_1 + src_shape[0] VSRC = h5.VirtualSource(foo, 'data', shape=src_shape) VM = h5.VirtualMap(VSRC, TGT[M_minus_1:M, :, :], dtype=np.float) VMlist.append(VM) M_minus_1 = M d = f.create_virtual_dataset(VMlist=VMlist, fillvalue=45) f.close() f = h5.File(self.outfile, 'r')['data'] self.assertEqual(f[10, 100, 10], 0.0) self.assertEqual(f[30, 100, 100], 1.0) self.assertEqual(f[50, 100, 100], 2.0) self.assertEqual(f[70, 100, 100], 3.0) f.file.close()
def test_percival_high_level(self): self.outfile = self.working_dir + 'percival.h5' VM = [] # Create the virtual dataset file with h5.File(self.outfile, 'w', libver='latest') as f: TGT = h5.VirtualTarget( self.outfile, 'data', shape=(79, 200, 200), maxshape=(None, 200, 200) ) # Virtual target is a representation of the output dataset k = 0 for foo in self.fname: VSRC = h5.VirtualSource(foo, 'data', shape=(20, 200, 200), maxshape=(None, 200, 200)) VM.append( h5.VirtualMap(VSRC, TGT[k:79:4, :, :], dtype=np.float)) k += 1 f.create_virtual_dataset( VMlist=VM, fillvalue=-5) # pass the fill value and list of maps f.close() f = h5.File(self.outfile, 'r')['data'] sh = f.shape line = f[:8, 100, 100] foo = np.array(2 * range(4)) f.file.close() self.assertEqual( sh, (79, 200, 200), ) np.testing.assert_array_equal(line, foo)
def test_excalibur_high_level(self): self.outfile = self.working_dir + 'excalibur.h5' f = h5.File(self.outfile, 'w', libver='latest') # create an output file. in_key = 'data' # where is the data at the input? in_sh = h5.File(self.fname[0], 'r')[in_key].shape # get the input shape dtype = h5.File(self.fname[0], 'r')[in_key].dtype # get the datatype # now generate the output shape vertical_gap = 10 # pixels spacing in the vertical nfiles = len(self.fname) print "nfiles is:" + str(nfiles) nframes = in_sh[0] width = in_sh[2] height = (in_sh[1] * nfiles) + (vertical_gap * (nfiles - 1)) out_sh = (nframes, height, width) print out_sh, in_sh TGT = h5.VirtualTarget( self.outfile, 'data', shape=out_sh ) # Virtual target is a representation of the output dataset offset = 0 # initial offset print(offset + in_sh[1]) - offset VMlist = [] # place to put the maps for i in range(nfiles): print("frame_number is: %s, offset is:%s" % (str(i), offset) ) # for feedback VSRC = h5.VirtualSource( self.fname[i], in_key, shape=in_sh) #a representation of the input dataset VM = h5.VirtualMap(VSRC, TGT[:, offset:(offset + in_sh[1]), :], dtype=dtype) # map them with indexing offset += in_sh[1] + vertical_gap # increment the offset VMlist.append(VM) # append it to the list f.create_virtual_dataset( VMlist=VMlist, fillvalue=0x1) # pass the fill value and list of maps f.close() f = h5.File(self.outfile, 'r')['data'] self.assertEqual(f[3, 100, 0], 0.0) self.assertEqual(f[3, 260, 0], 1.0) self.assertEqual(f[3, 350, 0], 3.0) self.assertEqual(f[3, 650, 0], 6.0) self.assertEqual(f[3, 900, 0], 9.0) self.assertEqual(f[3, 1150, 0], 12.0) self.assertEqual(f[3, 1450, 0], 15.0) f.file.close()
def create_vds_maps(self, source, vds_data): """Create a list of VirtualMaps of raw data to the VDS. Args: source(Source): Source attributes vds_data(VDS): VDS attributes Returns: list(VirtualMap): Maps describing links between raw data and VDS """ source_shape = source.frames + (source.height, source.width) vds = h5.VirtualTarget(self.output_file, self.target_node, shape=vds_data.shape) map_list = [] current_position = 0 for idx, dataset in enumerate(self.datasets): v_source = h5.VirtualSource(dataset, self.source_node, shape=source_shape) start = current_position stop = start + source.height + vds_data.spacing[idx] current_position = stop index = tuple([self.FULL_SLICE] * len(source.frames) + [slice(start, stop)] + [self.FULL_SLICE]) v_target = vds[index] v_map = h5.VirtualMap(v_source, v_target, dtype=source.dtype) self.logger.debug("Mapping dataset %s to %s of %s.", dataset.split("/")[-1], index, self.name) map_list.append(v_map) return map_list
''' https://support.hdfgroup.org/HDF5/docNewFeatures/VDS/HDF5-VDS-requirements-use-cases-2014-12-10.pdf The eiger use case ''' import h5py as h5 import numpy as np f = h5py.File("VDS.h5", 'w', libver='latest') files = ['1.h5', '2.h5', '3.h5', '4.h5', '5.h5'] entry_key = 'data' # where the data is inside of the source files. sh = h5.File(file_names_to_concatenate[0], 'r')[entry_key].shape # get the first ones shape. TGT = h5.VirtualTarget(outfile, outkey, shape=(len(file_names_to_concatenate, ) + sh)) M_minus_1 = 0 for i in range(len(files)): M = M_minus_1 + sh[0] VSRC = h5.VirtualSource(file_names_to_concatenate[i], entry_key, shape=sh) VM = h5.VirtualMap(VSRC, TGT[M_minus_1:M:1, :, :], dtype=np.float) VMlist.append(VM) M_minus_1 = M d = f.create_virtual_dataset(VMlist=VMlist, fillvalue=0) f.close() #
''' using the example refactored vds code ''' import h5py as h5 import numpy as np f = h5py.File("VDS.h5", 'w', libver='latest') file_names_to_concatenate = ['1.h5', '2.h5', '3.h5', '4.h5', '5.h5'] entry_key = 'data' # where the data is inside of the source files. sh = h5.File(file_names_to_concatenate[0],'r')[entry_key].shape # get the first ones shape. TGT = h5.VirtualTarget(outfile, outkey, shape=(len(file_names_to_concatenate, ) + sh) for i in range(num_projections): VSRC = h5.VirtualSource(file_names_to_concatenate[i]), entry_key, shape=sh) VM = h5.VirtualMap(VSRC[:,:,:], TGT[i:(i+1):1,:,:,:],dtype=np.float) VMlist.append(VM) d = f.create_virtual_dataset(VMlist=VMlist,fillvalue=0) f.close() #
''' https://support.hdfgroup.org/HDF5/docNewFeatures/VDS/HDF5-VDS-requirements-use-cases-2014-12-10.pdf The dual pco edge use case ''' import h5py as h5 f = h5.File('outfile.h5','w',libver='latest') # create an output file. in_sh = h5.File('raw_file_1.h5','r')['data'].shape # get the input shape dtype = h5.File('raw_file_1.h5','r')['data'].dtype # get the datatype VSRC1 = h5.VirtualSource('raw_file_1.h5', 'data',shape=in_sh) #a representation of the input dataset VSRC2 = h5.VirtualSource('raw_file_2.h5', 'data',shape=in_sh) #a representation of the input dataset TGT = h5.VirtualTarget('outfile.h5', 'data', shape=(in_sh[0], 2*in_sh[1]+gap, in_sh[3])) VM1 = h5.VirtualMap(VSRC1, TGT[0:in_sh[0]:1,:,:], dtype=dtype) VM2 = h5.VirtualMap(VSRC2, TGT[(in_sh[0]+gap):(2*in_sh[0]+gap+1):1,:,:], dtype=dtype) f.create_virtual_dataset(VMlist=[VM1, VM2], fillvalue=0x1) # pass the fill value and list of maps f.close()# close
in_key = 'data' # where is the data at the input? dtype = h5.File('raw_file_1.h5')['data'].dtype outshape = (799, 2000, 2000) TGT = h5.VirtualTarget( 'full_time_series.h5', in_key, shape=outshape) # Virtual target is a representation of the output dataset VSRC1 = h5.VirtualSource('raw_file_1.h5', 'data', shape=(200, 2000, 2000)) #a representation of the input dataset VSRC2 = h5.VirtualSource('raw_file_2.h5', 'data', shape=(200, 2000, 2000)) #a representation of the input dataset VSRC3 = h5.VirtualSource('raw_file_3.h5', 'data', shape=(200, 2000, 2000)) #a representation of the input dataset VSRC4 = h5.VirtualSource('raw_file_4.h5', 'data', shape=(199, 2000, 2000)) #a representation of the input dataset a = TGT[0:799:4, :, :] b = TGT[1:799:4, :, :] c = TGT[2:799:4, :, :] d = TGT[3:799:4, :, :] VM1 = h5.VirtualMap(VSRC1, a, dtype=dtype) # map them with indexing VM2 = h5.VirtualMap(VSRC2, b, dtype=dtype) # map them with indexing VM3 = h5.VirtualMap(VSRC3, c, dtype=dtype) # map them with indexing VM4 = h5.VirtualMap(VSRC4, d, dtype=dtype) # map them with indexing f.create_virtual_dataset(VMlist=[VM1, VM2, VM3, VM4], fillvalue=0x1) # pass the fill value and list of maps f.close() # close
dtype = h5.File(raw_files[0], 'r')[in_key].dtype # get the datatype outkey = 'full_frame' # where should it go in the output file # now generate the output shape vertical_gap = 10 # pixels spacing in the vertical nfiles = len(raw_files) nframes = in_sh[0] width = in_sh[2] height = (in_sh[1] * nfiles) + (vertical_gap * (nfiles - 1)) out_sh = (nframes, height, width) TGT = h5.VirtualTarget( outfile, outkey, shape=out_sh) # Virtual target is a representation of the output dataset offset = 0 # initial offset VMlist = [] # place to put the maps for i in range(nfiles): print("frame_number is: %s" % str(i)) # for feedback VSRC = h5.VirtualSource( raw_files[i], in_key, shape=in_sh) #a representation of the input dataset VM = h5.VirtualMap(VSRC, TGT[:, offset:(offset + in_sh[1]), :], dtype=dtype) # map them with indexing offset += in_sh[1] + vertical_gap # increment the offset VMlist.append(VM) # append it to the list f.create_virtual_dataset(VMlist=VMlist, fillvalue=0x1) # pass the fill value and list of maps f.close() # close