def createDataset(data, axes, dest, dataunit=None, attrs=None): """ This function creates an hdf5 dataset from given arrays data -> ndarray containing data axes -> Array of axes dictionaries of the following format: {'ax':array, 'name':'', 'unit'=''} The order of this array must correspond to the order of the data shape dest -> hdfpath to destination dataunit -> Unit string for data. Default is dimensionless ('') attrs -> Dictionary of attributes to be written to the dataset. """ if dataunit is None: dataunit = '' with h5py.File(dest.file, 'w') as df: destgrp = df[dest.group] #Create the axes #each axis is a dict {'ax':array, 'name':'', 'unit'=''} #These must appear in the order that the dimensions appear in the array dimensions = [] for ax in axes: destgrp.require_dataset(ax['name'], ax['ax'].shape, np.float32, chunks=True) destgrp[ax['name']][:] = ax['ax'] destgrp[ax['name']].attrs['unit'] = ax['unit'] dimensions.append(ax['name'].encode("utf-8")) #Create the data group destgrp.require_dataset('data', data.shape, np.float32, chunks=True, compression='gzip') destgrp['data'][:] = data destgrp['data'].attrs['unit'] = dataunit destgrp['data'].attrs['dimensions'] = dimensions if attrs is not None: hdftools.writeAttrs(attrs, destgrp)
def imgDirToRaw(run, probe, img_dir, dest, csv_dir, verbose=False): #Import attributes for this run/probe attrs = csvtools.getAllAttrs(csv_dir, run, probe) #Check for keys always required by this function req_keys = ['run_folder'] csvtools.missingKeys(attrs, req_keys, fatal_error=True) run_folder = attrs['run_folder'][0] src = os.path.join(img_dir, run_folder) #Go through the directory and fine all the image files imgfiles = [] for root, dirs, files in os.walk(src): files = [f for f in files if f[0] != '.'] #Exclude files beginning in . for file in files: imgfiles.append(os.path.join(src, file)) #Natural-sort the images by filename imgfiles = natural_sort(imgfiles) nframes = len(imgfiles) #remove files if they already exist if os.path.exists(dest.file): os.remove(dest.file) #Create the destination file with h5py.File(dest.file, "a") as df: #Assume all images are the same shape, load the first one to figure #out the array dimensions img = PIL.Image.open(imgfiles[0]) nxpx, nypx = img.size #Bands will include the names of the different channels nchan = len(img.getbands()) #Create the dest group, throw error if it exists if dest.group != '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) grp = df[dest.group] #Initialize the output data array if 'data' in grp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset + associated attributes grp.require_dataset("data", (nframes, nxpx, nypx, nchan), np.float32, chunks=(1, nxpx, nypx, 1), compression='gzip') grp['data'].attrs['unit'] = '' #Initialize time-remaining printout tr = util.timeRemaining(nframes, reportevery=5) #Actually put the images into the file for i, f in enumerate(imgfiles): tr.updateTimeRemaining(i) img = np.array(PIL.Image.open(f)) img = np.reshape(img, [nxpx, nypx, nchan]) #Rotate images for chan in range(nchan): img[:, :, chan] = np.rot90(img[:, :, chan], k=3) grp['data'][i, :, :, :] = img dimlabels = ['frames', 'xpixels', 'ypixels', 'chan'] grp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, grp) #Create the axes grp.require_dataset('frames', (nframes, ), np.float32, chunks=True)[:] = np.arange(nframes) grp['frames'].attrs['unit'] = '' grp.require_dataset('xpixels', (nxpx, ), np.float32, chunks=True)[:] = np.arange(nxpx) grp['xpixels'].attrs['unit'] = '' grp.require_dataset('ypixels', (nypx, ), np.float32, chunks=True)[:] = np.arange(nypx) grp['ypixels'].attrs['unit'] = '' grp.require_dataset('chan', (nchan, ), np.float32, chunks=True)[:] = np.arange(nchan) grp['chan'].attrs['unit'] = '' return dest
def imgSeqRawToFull(src, dest): with h5py.File(src.file, 'r') as sf: #Get the datagroup srcgrp = sf[src.group] #Create dictionary of attributes attrs = hdftools.readAttrs(srcgrp) #Check for keys always required by this function req_keys = ['dt'] csvtools.missingKeys(attrs, req_keys, fatal_error=True) nframes, nxpx, nypx, nchan = srcgrp['data'].shape #Convert dt dt = (attrs['dt'][0] * u.Unit(attrs['dt'][1])).to(u.s).value #Reps is assumed to be 1 unless otherwise set if 'nreps' in attrs.keys() and not np.isnan(attrs['nreps'][0]): nreps = attrs['nreps'][0] else: nreps = 1 nti = int(nframes / nreps) #t0 is the time of the first frame in the set if 't0' in attrs.keys() and not np.isnan(attrs['t0'][0]): t0 = (attrs['t0'][0] * u.Unit(attrs['t0'][1])).to(u.s).value else: t0 = 0 #Laser t0 is the time when the laser fires #Time array will be shifted so this time is zero if 'camera_delay' in attrs.keys() and not np.isnan( attrs['camera_delay'][0]): camera_delay = (attrs['camera_delay'][0] * u.Unit(attrs['camera_delay'][1])).to(u.s).value else: camera_delay = 0 #dxdp is the pixel spacing in cm/px if 'dxdp' in attrs.keys() and not np.isnan(attrs['dxdp'][0]): dxdp = (attrs['dxdp'][0] * u.Unit(attrs['dxdp'][1])).to(u.cm).value else: dxdp = None if 'dydp' in attrs.keys() and not np.isnan(attrs['dydp'][0]): dydp = (attrs['dydp'][0] * u.Unit(attrs['dydp'][1])).to(u.cm).value else: dydp = None if 'x0px' in attrs.keys() and not np.isnan(attrs['x0px'][0]): x0px = (attrs['x0px'][0] * u.Unit(attrs['x0px'][1])).to(u.cm).value else: x0px = 0 if 'y0px' in attrs.keys() and not np.isnan(attrs['y0px'][0]): y0px = (attrs['y0px'][0] * u.Unit(attrs['y0px'][1])).to(u.cm).value else: y0px = 0 with h5py.File(dest.file, 'a') as df: destgrp = df.require_group(dest.group) destgrp.require_dataset("data", (nti, nxpx, nypx, nreps, nchan), np.float32, chunks=(1, nxpx, nypx, 1, 1), compression='gzip') destgrp['data'].attrs['unit'] = '' #Initialize time-remaining printout tr = util.timeRemaining(nti, reportevery=5) #Actually put the images into the file for i in range(nti): tr.updateTimeRemaining(i) a = i * nreps b = (i + 1) * nreps #print(str(a) + ":" + str(b)) #Copy, re-shape, and write data to array arr = srcgrp['data'][a:b, ...] arr = np.moveaxis(arr, 0, 2) #arr = np.reshape(arr, [nreps, nxpx, nypx, nchan]) destgrp['data'][i, ...] = arr #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, destgrp) dimlabels = [] time = np.arange(nti) * dt + camera_delay - t0 destgrp.require_dataset('time', (nti, ), np.float32, chunks=True)[:] = time destgrp['time'].attrs['unit'] = 's' dimlabels.append('time') if dxdp is not None: xaxis = (np.arange(nxpx) - x0px) * dxdp destgrp.require_dataset('xaxis', (nxpx, ), np.float32, chunks=True)[:] = xaxis destgrp['xaxis'].attrs['unit'] = 'cm' dimlabels.append('xaxis') else: destgrp.require_dataset('xpixels', (nxpx, ), np.float32, chunks=True)[:] = np.arange(nxpx) destgrp['xpixels'].attrs['unit'] = '' dimlabels.append('xpixels') if dydp is not None: yaxis = (np.arange(nypx) - y0px) * dydp destgrp.require_dataset('yaxis', (nypx, ), np.float32, chunks=True)[:] = yaxis destgrp['yaxis'].attrs['unit'] = 'cm' dimlabels.append('yaxis') else: destgrp.require_dataset('ypixels', (nypx, ), np.float32, chunks=True)[:] = np.arange(nypx) destgrp['ypixels'].attrs['unit'] = '' dimlabels.append('ypixels') destgrp.require_dataset('reps', (nreps, ), np.float32, chunks=True)[:] = np.arange(nreps) destgrp['reps'].attrs['unit'] = '' dimlabels.append('reps') destgrp.require_dataset('chan', (nchan, ), np.float32, chunks=True)[:] = np.arange(nchan) destgrp['chan'].attrs['unit'] = '' dimlabels.append('chan') destgrp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ]
def lapdToRaw( run, probe, hdf_dir, csv_dir, dest, verbose=False, trange=[0, -1]): """ Retreives the appropriate metadata for a run and probe in a given data directory, then reads in the data using the bapsflib module and saves it in a new hdf5 file. Parameters ---------- run: int Run number probe: str Probe name hdf_dir: str (path) Path to the directory where HDF files are stored csv_dir: str(path) Path to the directory where metadata CSV's are stored dest: hdfPath object Path string to location data should be written out verbose: boolean Set this flag to true to enable print statements throughout the code, including a runtime-until-completion estimate during the data reading loop. trange: [start_index, end_index] Time range IN INDICES over which to load the data. -1 in the second index will be translated to nti-1 Returns ------- True, if execution is successful """ #Create a dictionary of attributes from the entire directory of CSV #files that applies to this probe and run attrs = csvtools.getAllAttrs(csv_dir, run, probe) #Check that some required keys are present, throw a fatal error if not req_keys = ['datafile', 'digitizer', 'adc'] csvtools.missingKeys(attrs, req_keys, fatal_error=True) #Load some digitizer parameters we now know exist digitizer = attrs['digitizer'][0] adc = attrs['adc'][0] #TODO: Should this file take a data_dir and determine the filename #automatically, or should a source hdf file be given, leaving the program #that calls this one to determine the HDF file name? src = os.path.join(hdf_dir, attrs['datafile'][0] + '.hdf5') #Create an array of channels (required input for bapsflib read_data) # channel_arr = array of tuples of form: (digitizer, adc, board#, channel#) # eg. channel_arr = ('SIS crate', 'SIS 3305', 2, 1) #Do this in a loop, so the number of channels is flexible #However, the number of 'brd' and 'chan' fields MUST match #AND, the keys must be of the format 'brd1', 'chan1', etc. channel_arr = [] nchan = 1 while True: brdstr = 'brd' + str(int(nchan)) chanstr = 'chan' + str(int(nchan)) if brdstr in attrs.keys() and chanstr in attrs.keys(): #Check to make sure channel has actual non-nan values if not np.isnan(attrs[brdstr][0]) and not np.isnan(attrs[chanstr][0]): #Append the channel to the list to be extracted channel_arr.append( (digitizer, adc, attrs[brdstr][0], attrs[chanstr][0]) ) nchan = nchan + 1 else: break #Determine the number of channels from the channel array nchan = len(channel_arr) #Read some variables from the src file with bapsf_lapd.File(src, silent=True) as sf: src_digitizers = sf.digitizers digi = src_digitizers['SIS crate'] #Assume this id the digitizer: it is the only one #Assume the adc, nti, etc. are all the same on all the channels. #This line assumes that only one configuration is being used #This is usually the case: if it is not, changes need to be made daq_config = digi.active_configs[0] name, info = digi.construct_dataset_name(channel_arr[0][2], channel_arr[0][3], adc=channel_arr[0][1], config_name = daq_config, return_info=True) #Read out some digitizer parameters nshots = info['nshotnum'] nti = info['nt'] #clock_rate = info['clock rate'].to(u.Hz) #dt = ( 1.0 / clock_rate ).to(u.s) sti = trange[0] if trange[1] == -1: eti = nti-1 else: eti = trange[1] nti = eti- sti #Check if keys are provided to specify a motion list # control = array of tuples of form (motion control, receptacle) # eg. controls = [('6K Compumotor', receptacle)] # note that 'receptacle' here is the receptacle NUMBER, 1 - indexed!) req_keys = ['motion_controller', 'motion_receptacle'] if csvtools.missingKeys(attrs, req_keys, fatal_error = False): print("Some motion keys not found: positon data will not be read out!") controls, pos = None, None else: motion_controller = attrs['motion_controller'][0] motion_receptacle = attrs['motion_receptacle'][0] controls = [(motion_controller, motion_receptacle)] #Check to see if the motion controller reported actually exists in the #hdf file. If not, assume the probe was stationary (motion=None) #If motion_controller isn't in this list, lapdReadHDF can't handle it #Check if the motion controller provided is supported by the code and if motion_controller in ['6K Compumotor', 'NI_XZ', 'NI_XYZ']: pos, attrs = readPosArray(src, controls, attrs) else: controls, pos = None, None #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Create the destination file with h5py.File(dest.file, "a") as df: #Create the dest group, throw error if it exists if dest.group is not '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) grp = df[dest.group] #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, grp) #Open the LAPD file and copy the data over with bapsf_lapd.File(src, silent=True) as sf: #Initialize the output data array if 'data' in grp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset + associated attributes grp.require_dataset("data", (nshots, nti, nchan), np.float32, chunks=(1, np.min([nti, 20000]), 1), compression='gzip') grp['data'].attrs['unit'] = 'V' dimlabels = ['shots', 'time', 'chan'] grp['data'].attrs['dimensions'] = [s.encode('utf-8') for s in dimlabels] #Initialize time-remaining printout tr = util.timeRemaining(nchan*nshots) #Loop through the channels and shots, reading one-by-one into the #output dataset for chan in range(nchan): channel = channel_arr[chan] if verbose: print("Reading channel: " + str(chan+1) + '/' + str(nchan)) for shot in range(nshots): if verbose: tr.updateTimeRemaining(nshots*chan + shot) #Read the data through bapsflib data = sf.read_data(channel[2], channel[3], digitizer =channel[0], adc = channel[1], config_name = daq_config, silent=True, shotnum=shot+1) grp['data'][shot,:,chan] = data['signal'][0, sti:eti] if shot == 0: dt = data.dt #Adusted in bapsflib for clock rate, avging, etc. grp.attrs['dt'] = [s.encode('utf-8') for s in [str(dt.value), str(dt.unit)] ] #If applicable, write the pos array to file if pos is not None: grp.require_dataset('pos', (nshots, 3), np.float32)[:] = pos del pos #Create the axes grp.require_dataset('shots', (nshots,), np.float32, chunks=True )[:] = np.arange(nshots) grp['shots'].attrs['unit'] = '' t = np.arange(nti)*dt grp.require_dataset('time', (nti,), np.float32, chunks=True)[:] = t.value grp['time'].attrs['unit'] = str(t.unit) grp.require_dataset('chan', (nchan,), np.float32, chunks=True)[:] = np.arange(nchan) grp['chan'].attrs['unit'] = '' #Clear the LAPD HDF file from memory del(sf, data, t) return dest
def hrrToRaw(run, probe, hdf_dir, csv_dir, dest, verbose=False, debug=False): """ Retreives the appropriate metadata for a run and probe in a given data directory, then reads in the data from the HRR hdf5 output file. Parameters ---------- run: int Run number probe: str Probe name hdf_dir: str (path) Path to the directory where HDF files are stored csv_dir: str(path) Path to the directory where metadata CSV's are stored dest: hdfPath object Path string to location data should be written out verbose: boolean Set this flag to true to enable print statements throughout the code, including a runtime-until-completion estimate during the data reading loop. Returns ------- dest (Filepath to destination file) """ #Create a dictionary of attributes from the entire directory of CSV #files that applies to this probe and run attrs = csvtools.getAllAttrs(csv_dir, run, probe) #Check that some required keys are present, throw a fatal error if not req_keys = ['datafile'] csvtools.missingKeys(attrs, req_keys, fatal_error=True) #TODO: Should this file take a data_dir and determine the filename #automatically, or should a source hdf file be given, leaving the program #that calls this one to determine the HDF file name? src = os.path.join(hdf_dir, attrs['datafile'][0] + '.hdf5') #Create an array of channels #channel_arr = tuples of form (resource number, channel number) #Indexd from 1, to match load/LAPD.py channel_arr = [] nchan = 1 while True: digistr = 'resource' + str(int(nchan)) chanstr = 'chan' + str(int(nchan)) if chanstr in attrs.keys() and digistr in attrs.keys(): #Check to make sure channel has actual non-nan values if not np.isnan(attrs[digistr][0]) and not np.isnan( attrs[chanstr][0]): #Append the channel to the list to be extracted channel_arr.append((attrs[digistr][0], attrs[chanstr][0])) nchan = nchan + 1 else: break if debug: print("{:.0f} Data Channels found in csv".format(len(channel_arr))) #Create a dictionary of position channels #channel_arr = tuples of form (resource number, channel number) ax = ['x', 'y', 'z'] pos_chan = {} nchan = 1 for i in range(3): digistr = ax[i] + 'pos_resource' chanstr = ax[i] + 'pos_chan' if chanstr in attrs.keys() and digistr in attrs.keys(): #Check to make sure channel has actual non-nan values if not np.isnan(attrs[digistr][0]) and not np.isnan( attrs[chanstr][0]): #Append the channel to the list to be extracted pos_chan[ax[i]] = (attrs[digistr][0], attrs[chanstr][0]) else: pos_chan[ax[i]] = None else: pos_chan[ax[i]] = None if debug: print("{:.0f} Pos Channels found in csv".format(len(pos_chan))) #Determine the number of channels from the channel array nchan = len(channel_arr) #Read some variables from the src file with h5py.File(src, 'r') as sf: digi_name = 'RESOURCE ' + str(channel_arr[0][0]) print(digi_name) digigrp = sf[digi_name] resource_type = digigrp.attrs['RESOURCE TYPE'].decode('utf-8') attrs['RESOURCE ALIAS'] = ( digigrp.attrs['RESOURCE ALIAS'].decode('utf-8'), '') attrs['RESOURCE DESCRIPTION'] = ( digigrp.attrs['RESOURCE DESCRIPTION'].decode('utf-8'), '') attrs['RESOURCE ID'] = (digigrp.attrs['RESOURCE ID'], '') attrs['RESOURCE MODEL'] = ( digigrp.attrs['RESOURCE MODEL'].decode('utf-8'), '') attrs['RESOURCE TYPE'] = (resource_type, '') resource_unit = digigrp['CHANNEL 0']['UNITS'][0].decode('utf-8') attrs['motion_unit'] = ('mm', '') if resource_type == 'SCOPE': dataname = 'TRACE' nshots = digigrp['CHANNEL 0'][dataname].shape[0] nti = digigrp['CHANNEL 0'][dataname].shape[1] dt = digigrp['CHANNEL 0'][dataname].attrs['WAVEFORM DT'] * u.s attrs['dt'] = [str(dt.value), str(dt.unit)] #attrs['dt'] = [s.encode('utf-8') for s # in [str(dt.value), str(dt.unit) ] ] elif resource_type == 'MOTOR BOARD': dataname = 'POSITION' nshots = digigrp['CHANNEL 0'][dataname].shape[0] nti = 1 #Create the destination file with h5py.File(dest.file, "a") as df: #Create the dest group, throw error if it exists if dest.group is not '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) grp = df[dest.group] #Initialize the output data array if 'data' in grp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset + associated attributes grp.require_dataset("data", (nshots, nti, nchan), np.float32, chunks=(1, np.min([nti, 20000]), 1), compression='gzip') grp['data'].attrs['unit'] = resource_unit dimlabels = ['shots', 'time', 'chan'] grp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] #Open the hdf5 file and copy the data over with h5py.File(src) as sf: #Initialize time-remaining printout tr = util.timeRemaining(nchan * nshots) #Loop through the channels and shots, reading one-by-one into the #output dataset for chan in range(nchan): digi_name = 'RESOURCE ' + str(channel_arr[chan][0]) chan_name = 'CHANNEL ' + str(channel_arr[chan][1]) if verbose: print("Reading channel: " + str(chan + 1) + '/' + str(nchan)) for shot in range(nshots): if verbose: tr.updateTimeRemaining(nshots * chan + shot) #Read the data from the hdf5 file grp['data'][shot, :, chan] = sf[digi_name][chan_name][dataname][ shot, ...] if pos_chan['x'] is not None or pos_chan[ 'y'] is not None or pos_chan['z'] is not None: grp.require_dataset('pos', (nshots, 3), np.float32) ax = ['x', 'y', 'z'] unit_factor = (1.0 * u.Unit(attrs['motion_unit'][0])).to( u.cm).value attrs['motion_unit'] = ('cm', '') for i, a in enumerate(ax): if pos_chan[a] is not None: resname = 'RESOURCE ' + str(pos_chan[a][0]) channame = 'CHANNEL ' + str(int(pos_chan[a][1])) try: posdata = sf[resname][channame][ 'POSITION'][:] * unit_factor except KeyError: print("(!) POSITION Information not found for " + resname) print( "If motion is not included in run, set resource to NA in csv" ) #Handle the case where the multiple data points were #taken at a position so npos!=nshots npos = posdata.size if npos != nshots: posdata = np.repeat(posdata, int(nshots / npos)) grp['pos'][:, i] = posdata else: grp['pos'][:, i] = np.zeros(nshots) #Create the axes grp.require_dataset('shots', (nshots, ), np.float32, chunks=True)[:] = np.arange(nshots) grp['shots'].attrs['unit'] = '' grp.require_dataset('chan', (nchan, ), np.float32, chunks=True)[:] = np.arange(nchan) grp['chan'].attrs['unit'] = '' if resource_type == 'SCOPE': t = np.arange(nti) * dt grp.require_dataset('time', (nti, ), np.float32, chunks=True)[:] = t.value grp['time'].attrs['unit'] = str(t.unit) #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, grp) return dest
def asciiToRaw(src, dest, delimiter=None, skip_header=None, ax=None, axis_name=None, axis_unit=None, data_unit=None, run=None, probe=None, csv_dir=None): if csv_dir is not None: if run is not None or probe is not None: attrs = csvtools.getAllAttrs(csv_dir, run, probe) arr = np.genfromtxt(src.file, delimiter=delimiter, skip_header=skip_header) nelm, nchan = arr.shape if ax is None: axis = np.arange(nelm) axis_name = 'Indices' else: axis = arr[:, ax] axis_name = str(axis_name) outarr = np.zeros([nelm, nchan - 1]) i = 0 for j in range(nchan): if j != ax: outarr[:, i] = arr[:, j] i += 1 arr = outarr #remove files if they already exist if os.path.exists(dest.file): os.remove(dest.file) #Create the destination file with h5py.File(dest.file, "a") as df: #Create the dest group, throw error if it exists if dest.group != '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) grp = df[dest.group] #Initialize the output data array if 'data' in grp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset + associated attributes grp.require_dataset("data", (nelm, nchan), np.float32, chunks=True, compression='gzip') grp['data'].attrs['unit'] = str(data_unit) grp['data'][:] = arr dimlabels = [str(axis_name), 'chan'] grp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, grp) #Create the axes grp.require_dataset(str(axis_name), (nelm, ), np.float32, chunks=True)[:] = axis grp[str(axis_name)].attrs['unit'] = str(axis_unit) grp.require_dataset('chan', (nchan, ), np.float32, chunks=True)[:] = np.arange(nchan) grp['chan'].attrs['unit'] = '' return dest