def calcT0ind(srcgrp, verbose=False): try: nshots, nti, nchan = srcgrp['data'].shape except KeyError: raise KeyError( "tdiode.calcT0ind requires the data array to have an attribute 'shape'!" ) t0ind_array = np.zeros([nshots]) tr = util.timeRemaining(nshots) if verbose: print("Calculating t0 indices") for i in range(nshots): #Update time remaining if verbose: tr.updateTimeRemaining(i) maxind = np.argmax(srcgrp['data'][i, :, 0]) try: t0ind_array[i] = np.argmax( np.gradient(srcgrp['data'][i, 0:maxind, 0])) except ValueError: #This error can be thrown on a bad shot: shot will eventually be #ignored, so value written here doesn't ultimately matter. t0ind_array[i] = 0 del (nshots, nti, nchan) return t0ind_array.astype(int)
def findBadShots(srcgrp, t0indarr, verbose=False, badshotratio=None, fatal_badshot_percentage=None): try: nshots, nti, nchan = srcgrp['data'].shape except KeyError: raise KeyError( "tdiode.findBadShots requires the data array to have an attribute 'shape'!" ) goodshots_arr = [] badshots_arr = [] if badshotratio is None: badshotratio = 10 if fatal_badshot_percentage is None: fatal_badshot_percentage = 0.2 tr = util.timeRemaining(nshots) if verbose: print("Identifying bad shots") for i in range(nshots): #Update time remaining if verbose: tr.updateTimeRemaining(i) #TODO: trying using the mean of the last 500 points rather than the median as the reference pret0 = int(t0indarr[i] * 0.75) #Remove offset arr = srcgrp['data'][i, :, 0] - np.median(srcgrp['data'][i, :, 0]) #Ratio is between the stdev of the early stuff and the maximum value max_median_ratio = np.max(arr) / np.abs(np.std(arr[0:pret0])) #This defines a 'bad shot' where the laser diode was indistinct, #indicating a possible misfire if (max_median_ratio < badshotratio): badshots_arr.append(i) else: goodshots_arr.append(i) print("Found " + str(len(badshots_arr)) + ' bad shots') if len(badshots_arr) / nshots > fatal_badshot_percentage: raise ValueError("Lots of bad shots found! Bad sign! Aborting.") return badshots_arr, goodshots_arr
def scopeRawToFull(src, dest, port=14, tdiode_hdf=None, verbose=False, debug=False, vdist=False): """ Parameters ---------- src: hdfPath object Path string to a raw hdf5 file containing bdot data dest: hdfPath object Path string to location processed bdot data should be written out tdiode_hdf: hdfPath object Path to a raw hdf5 file containing tdiode data. If no HDF file is provided, no timing correction will be applied. port: float port at which the probe is located Returns ------- True (if executes to the end) """ # ****** # Load data from the raw HDF file # ****** with h5py.File(src.file, 'r') as sf: #Get the datagroup srcgrp = sf[src.group] #Create dictionary of attributes attrs = hdftools.readAttrs(srcgrp) #Check for keys always required by this function req_keys = [] #Process the required keys, throwing an error if any cannot be found csvtools.missingKeys(attrs, req_keys, fatal_error=True) #Extract the shape of the source data nshots, nti, nchan = srcgrp['data'].shape if verbose: print("Opening destination HDF file") #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Open the destination file #This exists WITHIN the open statement for the source file, so the #source file is open at the same time. with h5py.File(dest.file, 'a') as df: #Throw an error if this group already exists if dest.group is not '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) destgrp = df.require_group(dest.group) #Copy over attributes hdftools.copyAttrs(srcgrp, destgrp) #Load the time vector t = srcgrp['time'] #If tdiode_hdf is set, load the pre-processed tdiode data if tdiode_hdf is not None: if verbose: print("Loading tdiode array from file.") with h5py.File(tdiode_hdf.file, 'r') as sf: grp = sf[tdiode_hdf.group] t0indarr = grp['t0indarr'][:] goodshots = grp['goodshots'][:] tdiode_attrs = hdftools.readAttrs(grp) #If tdiode was digitized with a different dt, this correction #will be necessary dt_ratio = float(attrs['dt'][0]) / float(tdiode_attrs['dt'][0]) t0indarr = (t0indarr / dt_ratio).astype(np.int32) #We will remove up to max_t0shift indices from each array such that #the t0 indices all line up. min_t0ind = np.min(t0indarr[goodshots]) max_t0shift = np.max(t0indarr[goodshots]) - min_t0ind #Compute new nti nti = nti - max_t0shift t = t[0:nti] - t[min_t0ind] #Throw an error if this dataset already exists if 'data' in destgrp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset 'data' appropriate to whether or not output #data will be gridded if verbose: print("Creating 'data' group in destination file") destgrp.require_dataset('data', (nshots, nti), np.float32, chunks=(1, np.min([nti, 20000])), compression='gzip') #Initialize time-remaining printout tr = util.timeRemaining(nshots) if verbose: print("Beginning processing data shot-by-shot.") #Chunking data processing loop limits memory usage for i in range(nshots): #Update time remaining if verbose: tr.updateTimeRemaining(i) #If a tdiode hdf was supplied, calculate the index correction #here if tdiode_hdf is not None: #Calculate the starting and ending arrays for the data ta = t0indarr[i] - min_t0ind tb = ta + nti else: #By default, read in the entire dataset ta = None tb = None if debug: print("Data range: [" + str(ta) + "," + str(tb) + "]") #Read in the data from the source file signal = np.squeeze(srcgrp['data'][i, ta:tb]) destgrp['data'][i, :] = signal destgrp['data'].attrs['unit'] = '' if 'pos' in srcgrp: destgrp.copy(srcgrp['pos'], 'pos') destgrp.require_dataset('shots', (nshots, ), np.int32, chunks=True)[:] = srcgrp['shots'][:] destgrp['shots'].attrs['unit'] = srcgrp['shots'].attrs['unit'] dimlabels = ['shots', 'time'] destgrp.require_dataset('time', (nti, ), np.float32, chunks=True)[:] = t destgrp['time'].attrs['unit'] = 's' destgrp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] if verbose: print("End of Monochromator routine!") return True
def chunked_array_op(src, dest, ax, op, newshape, delsrc=False, verbose=False, **args): """ Apply one of the array functions to an entire dataset, breaking the dataset up into chunks to keep memory load low. src -> Source dataset (hdfpath object) dest -> Destination dataset path (hdfpath object) ax -> Axis (0 indexed) to average op -> Function to be applied. This function must be one of the op functions defined in this file, and must be included in the elif tree in this function newshape -> Shape the new dataset will be after op has been applied delsrc -> Boolean, if true src file will be deleted after operation verbose -> Boolean, if true activates printouts """ with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] #Check source is valid dataset validDataset(srcgrp) #Load information about source dataset oldshape = list(srcgrp['data'].shape) ndim = len(oldshape) dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:]) #Get ax index axind = getAxInd(ax, dimlabels) #Decide on a chunking axis #Get a list of the axes indices ordered by chunk size, largest to smallest chunks = np.flip(np.argsort(srcgrp['data'].chunks)) #Chose the largest one that ISN'T the chosen axis chunkax = chunks[0] if chunkax == axind: chunkax = chunks[1] print("Chunking axis: " + str(dimlabels[chunkax])) if srcgrp['data'].chunks[chunkax] < 2: print("WARNING: POSSIBLE INEFFICENT CHUNKING DETECTED!") #Determine optimal chunksize (along chunkax) ideal_chunk_elms = 1e7 #1e7*4 bytes (per float32) ~ 40mb, which is good nper = np.product(oldshape) / oldshape[ chunkax] #number of values per chunk ax value chunksize = int(np.round(ideal_chunk_elms / nper)) if chunksize < 1: chunksize = 1 #Determine nchunks nchunks = int(np.ceil(oldshape[chunkax] / chunksize)) #Create the destination dataset with h5py.File(dest.file, 'w') as df: destgrp = df[dest.group] #Copy all the dataset attributes hdftools.copyAttrs(srcgrp, destgrp) #Create new data array destgrp.require_dataset('data', newshape, np.float32, chunks=True, compression='gzip') destgrp['data'].attrs['unit'] = srcgrp['data'].attrs['unit'] if verbose: print(srcgrp['data'].shape) print(destgrp['data'].shape) #Copy the axes over, except the one being operated on #That axis will be copied over later, with changes for axis in dimlabels: if axis != ax: srcgrp.copy(axis, destgrp) #Create the axis being operated on...unless it is now trivial #Newshape was determined above, and is specific to the op if newshape[axind] > 1: destgrp.require_dataset(ax, (newshape[axind], ), np.float32, chunks=True) destgrp[ax].attrs['unit'] = srcgrp[ax].attrs['unit'] new_dimlabels = dimlabels #No changes else: new_dimlabels = dimlabels.pop(axind) #Trivial: remove this dim destgrp['data'].attrs['dimensions'] = hdftools.strListToArr( new_dimlabels) #Initialize time-remaining printout #Chunks are big, so report more often than usual tr = util.timeRemaining(nchunks, reportevery=1) for i in range(nchunks): #Update time remaining if verbose: tr.updateTimeRemaining(i) sl = [slice(None)] * ndim #Assemble the chunk slices if i != nchunks - 1: sl[chunkax] = slice(i * chunksize, (i + 1) * chunksize, None) else: sl[chunkax] = slice(i * chunksize, None, None) #Apply op to the chunk op(srcgrp['data'], destgrp['data'], sl, axind, args) #Make the new axis by applying op to the old axis op(srcgrp[ax], destgrp[ax], [slice(None)], 0, args) #If requested, delete the source file if delsrc: os.remove(src.file)
def imgDirToRaw(run, probe, img_dir, dest, csv_dir, verbose=False): #Import attributes for this run/probe attrs = csvtools.getAllAttrs(csv_dir, run, probe) #Check for keys always required by this function req_keys = ['run_folder'] csvtools.missingKeys(attrs, req_keys, fatal_error=True) run_folder = attrs['run_folder'][0] src = os.path.join(img_dir, run_folder) #Go through the directory and fine all the image files imgfiles = [] for root, dirs, files in os.walk(src): files = [f for f in files if f[0] != '.'] #Exclude files beginning in . for file in files: imgfiles.append(os.path.join(src, file)) #Natural-sort the images by filename imgfiles = natural_sort(imgfiles) nframes = len(imgfiles) #remove files if they already exist if os.path.exists(dest.file): os.remove(dest.file) #Create the destination file with h5py.File(dest.file, "a") as df: #Assume all images are the same shape, load the first one to figure #out the array dimensions img = PIL.Image.open(imgfiles[0]) nxpx, nypx = img.size #Bands will include the names of the different channels nchan = len(img.getbands()) #Create the dest group, throw error if it exists if dest.group != '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) grp = df[dest.group] #Initialize the output data array if 'data' in grp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset + associated attributes grp.require_dataset("data", (nframes, nxpx, nypx, nchan), np.float32, chunks=(1, nxpx, nypx, 1), compression='gzip') grp['data'].attrs['unit'] = '' #Initialize time-remaining printout tr = util.timeRemaining(nframes, reportevery=5) #Actually put the images into the file for i, f in enumerate(imgfiles): tr.updateTimeRemaining(i) img = np.array(PIL.Image.open(f)) img = np.reshape(img, [nxpx, nypx, nchan]) #Rotate images for chan in range(nchan): img[:, :, chan] = np.rot90(img[:, :, chan], k=3) grp['data'][i, :, :, :] = img dimlabels = ['frames', 'xpixels', 'ypixels', 'chan'] grp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, grp) #Create the axes grp.require_dataset('frames', (nframes, ), np.float32, chunks=True)[:] = np.arange(nframes) grp['frames'].attrs['unit'] = '' grp.require_dataset('xpixels', (nxpx, ), np.float32, chunks=True)[:] = np.arange(nxpx) grp['xpixels'].attrs['unit'] = '' grp.require_dataset('ypixels', (nypx, ), np.float32, chunks=True)[:] = np.arange(nypx) grp['ypixels'].attrs['unit'] = '' grp.require_dataset('chan', (nchan, ), np.float32, chunks=True)[:] = np.arange(nchan) grp['chan'].attrs['unit'] = '' return dest
def fullToBmag(src, dest, verbose=False): with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] try: dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:] ) shape = np.array(srcgrp['data'].shape) #Same as the old shape, but now without the channels dimension... shape[-1] = 1 except KeyError: raise KeyError("bdot.fullToBmag requires the data array to have an attribute 'dimensions' and 'shape'") #We will duplicate the chunking on the new array chunks = srcgrp['data'].chunks try: xax = dimlabels.index("xaxis") yax = dimlabels.index("yaxis") zax = dimlabels.index("zaxis") xaxis = srcgrp['xaxis'] yaxis = srcgrp['yaxis'] zaxis = srcgrp['zaxis'] nti = shape[ dimlabels.index("time") ] nx = shape[xax] ny = shape[yax] nz = shape[zax] except KeyError: raise KeyError("bdot.fullToBmag requires dimensions 'time', 'xaxis', 'yaxis', 'zaxis'") #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Delete destination file if it already exists if os.path.exists(dest.file): os.remove(dest.file) with h5py.File(dest.file, 'w') as df: destgrp = df[dest.group] destgrp.require_dataset('data', shape, np.float32, chunks=chunks, compression='gzip') destgrp['data'].attrs['unit'] = 'G' destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(dimlabels) #Copy the axes over for ax in dimlabels: if ax != 'chan': srcgrp.copy(ax, destgrp) else: destgrp.require_dataset('chan', (1,), np.int32, chunks=True)[:] = [0] destgrp['chan'].attrs['unit'] = '' chunksize = 100 nchunks = int(np.ceil(nti/chunksize)) #Initialize time-remaining printout tr = util.timeRemaining(nchunks, reportevery=10) for i in range(nchunks): #Update time remaining if verbose: tr.updateTimeRemaining(i) a = i*chunksize if i == nchunks-1: b = None else: b = (i+1)*chunksize bx = srcgrp['data'][a:b, ..., 0] by = srcgrp['data'][a:b, ..., 1] bz = srcgrp['data'][a:b, ..., 2] destgrp['data'][a:b, ...,0] = np.sqrt(np.power(bx,2) + np.power(by,2) + np.power(bz,2)) return dest
def fullToCurrent(src, dest, verbose=False): with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] try: dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:] ) shape = srcgrp['data'].shape except KeyError: raise KeyError("bdot.fullToCurrent requires the data array to have an attribute 'dimensions' and 'shape'") #We will duplicate the chunking on the new array chunks = srcgrp['data'].chunks try: xax = dimlabels.index("xaxis") yax = dimlabels.index("yaxis") zax = dimlabels.index("zaxis") xaxis = srcgrp['xaxis'] yaxis = srcgrp['yaxis'] zaxis = srcgrp['zaxis'] nti = shape[ dimlabels.index("time") ] nx = shape[xax] ny = shape[yax] nz = shape[zax] except KeyError: raise KeyError("bdot.fullToCurrent requires dimensions 'time', 'xaxis', 'yaxis', 'zaxis'") if nti > 10000: print("WARNING: NTI IS LARGE! CURRENT CALCULATION WILL TAKE A VERY LONG TIME!") print("If you have better things to do with your CPU hours, try thinning the data first.") #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Delete destination file if it already exists if os.path.exists(dest.file): os.remove(dest.file) with h5py.File(dest.file, 'w') as df: destgrp = df[dest.group] destgrp.require_dataset('data', shape, np.float32, chunks=chunks, compression='gzip') destgrp['data'].attrs['unit'] = 'A/cm^2' destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(dimlabels) #Copy the axes over for ax in dimlabels: srcgrp.copy(ax, destgrp) chunksize = 100 nchunks = int(np.ceil(nti/chunksize)) #Initialize time-remaining printout tr = util.timeRemaining(nchunks, reportevery=10) for i in range(nchunks): #Update time remaining if verbose: tr.updateTimeRemaining(i) a = i*chunksize if i == nchunks-1: b = None else: b = (i+1)*chunksize #Constant is (c/4pi) * (conversion CGS -> A/m^2)*(conversion A/m^2 -> A/cm^2) #(2.99e10/4pi)*(3.0e-5)*(1e-4) #3e-5 is from the NRL formulary destgrp['data'][a:b, ...] = (7.138)*math.curl(srcgrp['data'][a:b, ...], xax, yax, zax, xaxis, yaxis, zaxis) return dest
def bdotRawToFull(src, dest, tdiode_hdf=None, grid=False, integrate=True, calibrate =True, highfreq_calibrate=True, angle_correction = True, remove_offset = True, replace_badshots = True, verbose=False, debug = False, offset_range=(0,100), offset_rel_t0 = (False, False), grid_precision=0.1, strict_grid=False, strict_axes = False): """ Integrates bdot data, calibrates output using information about the probe. Corrects for probe angle based on which drive is being used. Parameters ---------- src: hdfPath object Path string to a raw hdf5 file containing bdot data dest: hdfPath object Path string to location processed bdot data should be written out tdiode_hdf: hdfPath object Path to a raw hdf5 file containing tdiode data. If no HDF file is provided, no timing correction will be applied. grid: Boolean If grid is true, output will be written in cartesian grid array format, eg. [nti, nx, ny, nz, nreps, nchan]. Otherwise, output will be in [nshots, nti, nchan] format integrate: Boolean If True, integrate the bdot data (usually you want to do this). Default is True calibrate: Boolean If True, calculate and apply ANY calibration factors to the data. Default is True. highfreq_calibrate: Boolean If True, calculate and apply the high frequency calibration factors to the data. Default is True. If the 'tau' variables are not specified in the probe metadata, the HF calibration won't be applied regardless of this keyword. angle_correction: Boolean If True, apply any angular correction between axes that is required based on the motion_format keyword in the metadata. If false, no correction is applied regardless of the metadata. Default is True. remove_offset: Boolean If True, remove an offset from the data based on the offset_range specified in those keywords. If False, data will remain as-is. Default is True. replace_badshots: Boolean If True, semi-intelligently replace bad shots with neighboring good shots. If False, data remains as-is. Default is True. offset_range: tuple Tuple of indices between which the average of the signal will be computed and subtracted from the entire signal to correct for offset. This should be a segment with just noise, ideally at the very beginning of the dataset. Longer is better. Default is (0,100) offset_rel_t0: Tuple of booleans If either of these values is set to True, the coorresponding offset_range value will be taken to be relative to the t0 index for that each shot. For example, if t0=2000 for a shot, offset_range=(10, -100), and offset_rel_t0 = (False, True), then the offset will be computed over the range (10, 1900) grid_precision: float This is the precision to which position values will be rounded before being fit onto the grid. Only applies to fuzzy axis and grid creation. strict_axes: boolean If true, attempt to calculate axes from saved grid parameters. Default is false, which attempts to calculate axes by looking at position values. strict_grid: boolean If true, strictly unravel data onto the axes, assuming the probe moved in order reps->X->Y->Z. This will NOT correctly handle points where the probe was not at the requested position. Default is false, which applys "fuzzy gridding", which tries to find the best grid position for each shot individually. Returns ------- True (if executes to the end) """ # ****** # Load data from the raw HDF file # ****** with h5py.File(src.file, 'r') as sf: #Get the datagroup srcgrp = sf[src.group] #Create dictionary of attributes attrs = hdftools.readAttrs(srcgrp) #Check for keys always required by this function req_keys = ['xarea', 'yarea', 'zarea', 'xatten', 'yatten', 'zatten', 'gain', 'xpol', 'ypol', 'zpol', 'roll', 'probe_origin_x', 'probe_origin_y', 'probe_origin_z', 'dt', 'nturns'] if 'pos' in srcgrp: pos = srcgrp['pos'][:] #Read the entire array in #If pos array exists, there are keywords required for that too. motion_format = attrs['motion_format'][0] if motion_format == 'fixed_pivot' and angle_correction: req_keys = req_keys + ['rot_center_x', 'rot_center_y', 'rot_center_z'] elif motion_format == 'cartesian' and angle_correction: pass elif not angle_correction: pass else: raise ValueError("Motion format unrecognized: " + str(attrs['motion_format'][0]) ) else: #If no position information is given, a single explicit position #is required. req_keys = req_keys + ['xpos', 'ypos', 'zpos'] grid = False #Can't grid data if there's no pos array! motion_format = None #Process the required keys, throwing an error if any cannot be found csvtools.missingKeys(attrs, req_keys, fatal_error=True) #Extract the shape of the source data nshots, nti, nchan = srcgrp['data'].shape #If requested by keyword, apply gridding if grid: shotgridind, xaxis, yaxis, zaxis, nx, ny, nz, nreps, nshots = postools.grid( pos, attrs, strict_axes=strict_axes, strict_grid=strict_grid, grid_precision=grid_precision, invert=False) if verbose: print("Opening destination HDF file") #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Open the destination file #This exists WITHIN the open statement for the source file, so the #source file is open at the same time. with h5py.File(dest.file, 'a') as df: #Throw an error if this group already exists if dest.group is not '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) destgrp = df.require_group(dest.group) #Copy over attributes hdftools.copyAttrs(srcgrp, destgrp) #Load the time vector t = srcgrp['time'] #If a timing diode is being applied, correct the time vector here. if tdiode_hdf is not None: if verbose: print("Loading tdiode array from file.") with h5py.File(tdiode_hdf.file, 'r') as sf: grp = sf[tdiode_hdf.group] t0indarr = grp['t0indarr'][:] goodshots = grp['goodshots'][:] badshots = grp['badshots'][:] tdiode_attrs = hdftools.readAttrs(grp) #If tdiode was digitized with a different dt, this correction #will be necessary dt_ratio = float(attrs['dt'][0])/float(tdiode_attrs['dt'][0]) t0indarr = (t0indarr/dt_ratio).astype(np.int32) #We will remove up to max_t0shift indices from each array such that #the t0 indices all line up. min_t0ind = np.min(t0indarr[goodshots]) max_t0shift = np.max(t0indarr[goodshots]) - min_t0ind #Compute new nti nti = nti - max_t0shift t = t[0:nti] - t[min_t0ind] #Throw an error if this dataset already exists if 'data' in destgrp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset 'data' appropriate to whether or not output #data will be gridded if verbose: print("Creating 'data' group in destination file") if grid: destgrp.require_dataset('data', (nti, nx, ny, nz, nreps, nchan), np.float32, chunks=(np.min([nti, 20000]),1,1,1,1,1), compression='gzip') else: destgrp.require_dataset('data', (nshots, nti, nchan), np.float32, chunks=(1, np.min([nti, 20000]), 1), compression='gzip') # dt -> s dt = ( attrs['dt'][0]*u.Unit(attrs['dt'][1])).to(u.s).value if calibrate: #First calculate the low frequency calibration factors calAx, calAy, calAz = calibrationFactorsLF(attrs) #If HF calibration factors are provided, calculate those #calibraton constants too if 'xtau' in attrs.keys() and highfreq_calibrate: calBx, calBy, calBz = calibrationFactorsHF(attrs) else: calBx, calBy, calBz = None,None,None #This segment of code checks for bad shots and replaces them with #Neighboring good shots shotlist = np.arange(nshots) if replace_badshots and tdiode_hdf is not None: for i in shotlist: if i in badshots: #If the shot is bad, determine the best neighbor shot #to replace it with before_shot = i after_shot = i #Identify nearest good shot before and after while before_shot in badshots: before_shot = before_shot - 1 while after_shot in badshots: after_shot = after_shot + 1 #If position data is provided, use that to determine #the best match if 'pos' in srcgrp: before_dist = (np.power(pos[i,0] - pos[before_shot,0],2) + np.power(pos[i,1] - pos[before_shot,1],2) + np.power(pos[i,2] - pos[before_shot,2],2) ) after_dist = (np.power(pos[i,0] - pos[after_shot,0],2) + np.power(pos[i,1] - pos[after_shot,1],2) + np.power(pos[i,2] - pos[after_shot,2],2) ) if before_dist > after_dist: best_match = after_shot else: best_match = before_shot #Otherwise just chose the earlier shot as the default else: best_match = before_shot if verbose: print("Replaced bad shot " + str(i) + " with " + str(best_match)) #Actually make the substitution shotlist[i] = best_match #Initialize time-remaining printout tr = util.timeRemaining(nshots) if verbose: print("Beginning processing data shot-by-shot.") #Chunking data processing loop limits memory usage for ind in range(nshots): #i == ind unless this is a bad shot i = shotlist[ind] #Update time remaining if verbose: tr.updateTimeRemaining(i) #If a tdiode hdf was supplied, calculate the index correction #here if tdiode_hdf is not None and remove_offset: #Calculate the starting and ending arrays for the data ta = t0indarr[ind] - min_t0ind tb = ta + nti #Calculate the range over which to calculate the offset #for each shot #If offset_rel_t0 is set for either point, add the t0 array if offset_rel_t0[0]: offset_a = offset_range[0] + t0indarr[i] - ta else: offset_a = offset_range[0] if offset_rel_t0[1]: offset_b = offset_range[1] + t0indarr[i] - ta else: offset_b = offset_range[1] #added this to deal with cases where you have a timing diode but don't want to remove voltage offset elif tdiode_hdf is not None and remove_offset == False: #Calculate the starting and ending arrays for the data ta = t0indarr[ind] - min_t0ind tb = ta + nti offset_a = offset_range[0] offset_b = offset_range[1] else: #By default, read in the entire dataset ta = None tb = None offset_a = offset_range[0] offset_b = offset_range[1] if debug: print("Data range: [" + str(ta) + "," + str(tb) + "]") print("Offset range: [" + str(offset_a) + "," + str(offset_b) + "]") #Read in the data from the source file dbx = srcgrp['data'][i,ta:tb, 0] dby = srcgrp['data'][i,ta:tb, 1] dbz = srcgrp['data'][i,ta:tb, 2] if remove_offset: #Remove offset from each channel dbx = dbx - np.mean(dbx[offset_a:offset_b]) dby = dby - np.mean(dby[offset_a:offset_b]) dbz = dbz - np.mean(dbz[offset_a:offset_b]) if integrate: #Intgrate bx = np.cumsum(dbx)*dt by = np.cumsum(dby)*dt bz = np.cumsum(dbz)*dt else: bx,by,bz = dbx, dby, dbz if calibrate: #Apply the high-frequency calibration if one was #provided if calBx is not None and highfreq_calibrate: bx = bx + calBx*dbx by = by + calBy*dby bz = bz + calBz*dbz #Apply the low-frequency calibration factors #Probe pol dir is included in these bx = bx*calAx by = by*calAy bz = bz*calAz #If a motion_format is set, apply the appropriate probe angle correction if motion_format == 'cartesian' and angle_correction: #Don't need to make any correction pass elif motion_format == 'fixed_pivot' and angle_correction: #x,y,z is the probe's current position x,y,z = srcgrp['pos'][i, :] #rx, ry, rz is the location of the probe rotation point #i.e. the center of the ball valve. rx, ry, rz = attrs['rot_center_x'][0],attrs['rot_center_y'][0],attrs['rot_center_z'][0] #x-rx, y-ry, z-rz is a vector pointing along the probe #shaft towards the probe tip #pitch is the angle of the probe shaft to the xz plane pitch = np.arctan( (y-ry) / (x-rx) ) #yaw is the angle of the probe shaft to the xy plane yaw = np.arctan( (z-rz) / (x-rx) ) if debug: print("****Fixed Pivot Debug*******") print("(x,y,z) = ({:5.2f},{:5.2f},{:5.2f})".format(x,y,z)) print("(rx,ry,rz) = ({:5.2f},{:5.2f},{:5.2f})".format(rx,ry,rz)) print("Pitch: " + str(np.degrees(pitch))) print("Yaw: " + str(np.degrees(yaw))) #If the probe is coming from the -X direction, its calibrated Z axis is already off by 180 degrees. #This is because the probes are calibrated to match the East side of LAPD if ((x-rx) > 0.0): yaw = yaw + np.pi #Roll is rotation of the probe about its axis, with #y+ oriented up as roll=0 #This should be zero, unless a probe was later discovered #to be incorrectly calibrated, so that the +Y mark was #wrong roll, unit = attrs['roll'] if unit != 'rad': np.radians(roll) #Matrix is the first Tait-Bryan matrix XZY from https://en.wikipedia.org/wiki/Euler_angles #1 -> roll #2 -> pitch #3 -> yaw bx = (np.cos(pitch)*np.cos(yaw)*bx - np.sin(pitch)*by + np.cos(pitch)*np.sin(yaw)*bz) by = ((np.sin(roll)*np.sin(yaw) + np.cos(roll)*np.cos(yaw)*np.sin(pitch))*bx + np.cos(roll)*np.cos(pitch)*by + (np.cos(roll)*np.sin(pitch)*np.sin(yaw) - np.cos(yaw)*np.sin(roll))*bz) bz = ((np.cos(yaw)*np.sin(roll)*np.sin(pitch) - np.cos(roll)*np.sin(yaw))*bx + np.cos(pitch)*np.sin(roll)*by + (np.cos(roll)*np.cos(yaw) + np.sin(roll)*np.sin(pitch)*np.sin(yaw))*bz) if grid: #Get location to write this datapoint from the shotgridind xi = shotgridind[ind, 0] yi = shotgridind[ind, 1] zi = shotgridind[ind, 2] repi = shotgridind[ind, 3] #Write data try: #print(f"length destgrp selected {len(destgrp['data'][:, xi, yi, zi, repi, 0])}") destgrp['data'][:, xi, yi, zi, repi, 0] = bx destgrp['data'][:, xi, yi, zi, repi, 1] = by destgrp['data'][:, xi, yi, zi, repi, 2] = bz except ValueError as e: print("ERROR!") print(destgrp['data'].shape) print(bx.shape) print([xi, yi, zi, repi]) raise(e) else: #Write data destgrp['data'][ind,:, 0] = bx destgrp['data'][ind,:, 1] = by destgrp['data'][ind,:, 2] = bz if verbose: print("Writing axes to destination file") #Write the axes as required by the format of the data written if motion_format is not None: #Add the other axes and things we'd like in this file destgrp.require_dataset('pos', (nshots, 3), np.float32, chunks=True)[:] = srcgrp['pos'][0:nshots] for k in srcgrp['pos'].attrs.keys(): destgrp['pos'].attrs[k] = srcgrp['pos'].attrs[k] if grid: dimlabels = ['time', 'xaxis', 'yaxis', 'zaxis', 'reps', 'chan'] destgrp.require_dataset('xaxis', (nx,), np.float32, chunks=True)[:] = xaxis destgrp['xaxis'].attrs['unit'] = attrs['motion_unit'][0] destgrp.require_dataset('yaxis', (ny,), np.float32, chunks=True)[:] = yaxis destgrp['yaxis'].attrs['unit'] = attrs['motion_unit'][0] destgrp.require_dataset('zaxis', (nz,), np.float32, chunks=True)[:] = zaxis destgrp['zaxis'].attrs['unit'] = attrs['motion_unit'][0] destgrp.require_dataset('reps', (nreps,), np.int32, chunks=True)[:] = np.arange(nreps) destgrp['reps'].attrs['unit'] = '' else: dimlabels = ['shots', 'time', 'chan'] destgrp.require_dataset('shots', (nshots,), np.int32, chunks=True)[:] = srcgrp['shots'][:] destgrp['shots'].attrs['unit'] = srcgrp['shots'].attrs['unit'] destgrp.require_dataset('chan', (nchan,), np.int32, chunks=True)[:] = srcgrp['chan'][:] destgrp['chan'].attrs['unit'] = srcgrp['chan'].attrs['unit'] destgrp.require_dataset('time', (nti,), np.float32, chunks=True) destgrp['time'][:] = t destgrp['time'].attrs['unit'] = srcgrp['time'].attrs['unit'] if calibrate: destgrp['data'].attrs['unit'] = 'G' else: destgrp['data'].attrs['unit'] = 'V' destgrp['data'].attrs['dimensions'] = [s.encode('utf-8') for s in dimlabels] del(bx,by,bz) if verbose: print("End of BDOT routine!") return True
def isatRawToFull(src, dest, ti=1.0, mu=4.0, tdiode_hdf=None, grid=False, offset_range=(0, 100), offset_rel_t0=(False, False), verbose=False, debug=False, grid_precision=0.1, strict_grid=False, strict_axes=False): """ Integrates isat Langmuir probe data, calibrates output using information about the probe. Parameters ---------- src: hdfPath object Path string to a raw hdf5 file containing data dest: hdfPath object Path string to location processed data should be written out ti: Ion temperature (eV). Default assumption is 1 eV, which is typical of the LAPD LaB6 plasma. Scaling is as 1/sqrt(Ti). mu: Ion mass number (m_i/m_p = mu). Default is 4.0, for Helium. tdiode_hdf: hdfPath object Path to a raw hdf5 file containing tdiode data. If no HDF file is provided, no timing correction will be applied. grid: Boolean If grid is true, output will be written in cartesian grid array format, eg. [nti, nx, ny, nz, nreps, nchan]. Otherwise, output will be in [nshots, nti, nchan] format offset_range: tuple Tuple of indices between which the average of the signal will be computed and subtracted from the entire signal to correct for offset. This should be a segment with just noise, ideally at the very beginning of the dataset. Longer is better. Default is (0,100) offset_rel_t0: Tuple of booleans If either of these values is set to True, the coorresponding offset_range value will be taken to be relative to the t0 index for that each shot. For example, if t0=2000 for a shot, offset_range=(10, -100), and offset_rel_t0 = (False, True), then the offset will be computed over the range (10, 1900) grid_precision: float This is the precision to which position values will be rounded before being fit onto the grid. Only applies to fuzzy axis and grid creation. strict_axes: boolean If true, attempt to calculate axes from saved grid parameters. Default is false, which attempts to calculate axes by looking at position values. strict_grid: boolean If true, strictly unravel data onto the axes, assuming the probe moved in order reps->X->Y->Z. This will NOT correctly handle points where the probe was not at the requested position. Default is false, which applys "fuzzy gridding", which tries to find the best grid position for each shot individually. Returns ------- True (if executes to the end) """ # ****** # Load data from the raw HDF file # ****** with h5py.File(src.file, 'r') as sf: #Get the datagroup srcgrp = sf[src.group] #Create dictionary of attributes attrs = hdftools.readAttrs(srcgrp) #Check for keys always required by this function req_keys = [ 'area', 'atten', 'gain', 'resistor', 'dir', 'pol', 'probe_origin_x', 'probe_origin_y', 'probe_origin_z', 'dt' ] if 'pos' in srcgrp: pos = srcgrp['pos'][:] #Read the entire array in else: #If no position information is given, a single explicit position #is required. req_keys = req_keys + ['probe_xpos', 'probe_ypos', 'probe_zpos'] grid = False #Can't grid data if there's no pos array! #Process the required keys, throwing an error if any cannot be found csvtools.missingKeys(attrs, req_keys, fatal_error=True) #Extract the shape of the source data nshots, nti, nchan = srcgrp['data'].shape #If requested by keyword, apply gridding if grid: shotgridind, xaxis, yaxis, zaxis, nx, ny, nz, nreps, nshots = postools.grid( pos, attrs, strict_axes=strict_axes, strict_grid=strict_grid, grid_precision=grid_precision, invert=False) if verbose: print("Opening destination HDF file") #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Open the destination file #This exists WITHIN the open statement for the source file, so the #source file is open at the same time. #remove files if they already exist if os.path.exists(dest.file): os.remove(dest.file) with h5py.File(dest.file, 'a') as df: #Throw an error if this group already exists if dest.group is not '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) destgrp = df.require_group(dest.group) #Copy over attributes hdftools.copyAttrs(srcgrp, destgrp) #Load the time vector t = srcgrp['time'] #If tdiode_hdf is set, load the pre-processed tdiode data if tdiode_hdf is not None: if verbose: print("Loading tdiode array from file.") with h5py.File(tdiode_hdf.file, 'r') as sf: grp = sf[tdiode_hdf.group] t0indarr = grp['t0indarr'][:] goodshots = grp['goodshots'][:] tdiode_attrs = hdftools.readAttrs(grp) #If tdiode was digitized with a different dt, this correction #will be necessary dt_ratio = float(attrs['dt'][0]) / float(tdiode_attrs['dt'][0]) t0indarr = (t0indarr / dt_ratio).astype(np.int32) #We will remove up to max_t0shift indices from each array such that #the t0 indices all line up. min_t0ind = np.min(t0indarr[goodshots]) max_t0shift = np.max(t0indarr[goodshots]) - min_t0ind #Compute new nti nti = nti - max_t0shift t = t[0:nti] - t[min_t0ind] #Throw an error if this dataset already exists if 'data' in destgrp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset 'data' appropriate to whether or not output #data will be gridded if verbose: print("Creating 'data' group in destination file") if grid: destgrp.require_dataset('data', (nti, nx, ny, nz, nreps), np.float32, chunks=(np.min([nti, 20000]), 1, 1, 1, 1), compression='gzip') else: destgrp.require_dataset('data', (nshots, nti), np.float32, chunks=(1, np.min([nti, 20000])), compression='gzip') dt = (attrs['dt'][0] * u.Unit(attrs['dt'][1])).to(u.s).value resistor = float(attrs['resistor'][0]) #Ohms area = (attrs['area'][0] * u.Unit(attrs['area'][1])).to( u.m**2).value #Initialize time-remaining printout tr = util.timeRemaining(nshots) if verbose: print("Beginning processing data shot-by-shot.") #Chunking data processing loop limits memory usage for i in range(nshots): #Update time remaining if verbose: tr.updateTimeRemaining(i) #If a tdiode hdf was supplied, calculate the index correction #here if tdiode_hdf is not None: #Calculate the starting and ending arrays for the data ta = t0indarr[i] - min_t0ind tb = ta + nti else: #By default, read in the entire dataset ta = None tb = None if debug: print("Data range: [" + str(ta) + "," + str(tb) + "]") #Read in the data from the source file voltage = srcgrp['data'][i, ta:tb, 0] #Calculate density #Equation is 2 from this paper: 10.1119/1.2772282 #This is valid for the regime Te~Ti, which is approx true in #LAPD density = 1.6e9 * np.sqrt(mu) * voltage / (resistor * area ) #cm^-3 if grid: #Get location to write this datapoint from the shotgridind xi = shotgridind[i, 0] yi = shotgridind[i, 1] zi = shotgridind[i, 2] repi = shotgridind[i, 3] #Write data try: destgrp['data'][:, xi, yi, zi, repi] = density except ValueError as e: print("ERROR!") print(destgrp['data'].shape) print(voltage.shape) print([xi, yi, zi, repi]) raise (e) else: #Write data destgrp['data'][i, :] = density if verbose: print("Writing axes to destination file") if grid: #Add the other axes and things we'd like in this file destgrp.require_dataset( 'pos', (nshots, 3), np.float32, chunks=True)[:] = srcgrp['pos'][0:nshots] for k in srcgrp['pos'].attrs.keys(): destgrp['pos'].attrs[k] = srcgrp['pos'].attrs[k] dimlabels = ['time', 'xaxis', 'yaxis', 'zaxis', 'reps'] destgrp.require_dataset('xaxis', (nx, ), np.float32, chunks=True)[:] = xaxis destgrp['xaxis'].attrs['unit'] = attrs['motion_unit'][0] destgrp.require_dataset('yaxis', (ny, ), np.float32, chunks=True)[:] = yaxis destgrp['yaxis'].attrs['unit'] = attrs['motion_unit'][0] destgrp.require_dataset('zaxis', (nz, ), np.float32, chunks=True)[:] = zaxis destgrp['zaxis'].attrs['unit'] = attrs['motion_unit'][0] destgrp.require_dataset('reps', (nreps, ), np.int32, chunks=True)[:] = np.arange(nreps) destgrp['reps'].attrs['unit'] = '' else: dimlabels = ['shots', 'time'] destgrp.require_dataset('shots', (nshots, ), np.int32, chunks=True)[:] = srcgrp['shots'][:] destgrp['shots'].attrs['unit'] = srcgrp['shots'].attrs['unit'] destgrp.require_dataset('time', (nti, ), np.float32, chunks=True) destgrp['time'][:] = t destgrp['time'].attrs['unit'] = srcgrp['time'].attrs['unit'] destgrp['data'].attrs['unit'] = 'cm^{-3}' destgrp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] if verbose: print("End of isat Langmuir routine!") return True
def vsweepLangmuirRawToFull(src, ndest, tdest, grid=True, verbose=False, plots=False, debug=False, grid_precision=0.1, strict_grid=False, strict_axes=False): """ Fits sweept Langmuir probe data and creates two full save files containing the calculated density and temperature Parameters ---------- src: hdfPath object Path string to a raw hdf5 file containing swept Langmuir probe data There should be two channels: the first being the Langmuir current and the second being the ramp voltage. ndest: hdfPath object Path string to location processed density data is written out tdest: hdfPath object Path string to location processed temperature data is written out grid: Boolean If grid is true, output will be written in cartesian grid array format, eg. [nti, nx, ny, nz, nreps, nchan]. Otherwise, output will be in [nshots, nti, nchan] format grid_precision: float This is the precision to which position values will be rounded before being fit onto the grid. Only applies to fuzzy axis and grid creation. strict_axes: boolean If true, attempt to calculate axes from saved grid parameters. Default is false, which attempts to calculate axes by looking at position values. strict_grid: boolean If true, strictly unravel data onto the axes, assuming the probe moved in order reps->X->Y->Z. This will NOT correctly handle points where the probe was not at the requested position. Default is false, which applys "fuzzy gridding", which tries to find the best grid position for each shot individually. Returns ------- True (if executes to the end) """ # ****** # Load data from the raw HDF file # ****** with h5py.File(src.file, 'r') as sf: #Get the datagroup srcgrp = sf[src.group] #Create dictionary of attributes attrs = hdftools.readAttrs(srcgrp) #Check for keys always required by this function req_keys = [ 'area', 'resistor', 'gain', 'atten', 'ramp_gain', 'ramp_atten', 'probe_origin_x', 'probe_origin_y', 'probe_origin_z' ] if 'pos' in srcgrp: pos = srcgrp['pos'][:] #Read the entire array in else: #If no position information is given, a single explicit position #is required. req_keys = req_keys + ['xpos', 'ypos', 'zpos'] grid = False #Can't grid data if there's no pos array! #Process the required keys, throwing an error if any cannot be found csvtools.missingKeys(attrs, req_keys, fatal_error=True) #Extract the shape of the source data nshots, nti, nchan = srcgrp['data'].shape #If requested by keyword, apply gridding if grid: shotlist, xaxis, yaxis, zaxis, nx, ny, nz, nreps, nshots = postools.grid( pos, attrs, strict_axes=strict_axes, strict_grid=strict_grid, grid_precision=grid_precision, invert=True) if verbose: print("Opening destination HDF files") #Create the destination file directory if necessary #hdftools.requireDirs(ndest.file) #hdftools.requireDirs(tdest.file) #Open the destination file #This exists WITHIN the open statement for the source file, so the #source file is open at the same time. #Check if the output files exist already: if so, delete them if os.path.exists(ndest.file): os.remove(ndest.file) if os.path.exists(tdest.file): os.remove(tdest.file) with h5py.File(ndest.file, 'a') as ndf: with h5py.File(tdest.file, 'a') as tdf: #Throw an error if this group already exists if ndest.group != '/' and ndest.group in ndf.keys(): raise hdftools.hdfGroupExists(ndest) if tdest.group != '/' and tdest.group in tdf.keys(): raise hdftools.hdfGroupExists(tdest) ndestgrp = ndf.require_group(ndest.group) tdestgrp = tdf.require_group(tdest.group) grps = [ndestgrp, tdestgrp] for grp in grps: hdftools.copyAttrs(srcgrp, grp) #Throw an error if this dataset already exists if 'data' in grp.keys(): raise hdftools.hdfDatasetExists( str(grp) + ' -> ' + "'data'") #Determine the time vector and nti #Assume first shot is representative of the vramp vramp = srcgrp['data'][0, :, 1] time = srcgrp['time'][:] peaktimes, start, end = find_sweeps(time, vramp, plots=plots) nti = len(peaktimes) time = peaktimes #Create the dataset 'data' appropriate to whether or not output #data will be gridded if verbose: print("Creating 'data' group in destination file") if grid: ndestgrp.require_dataset('data', (nti, nx, ny, nz), np.float32, chunks=True, compression='gzip') ndestgrp.require_dataset('error', (nti, nx, ny, nz, 5), np.float32, chunks=True, compression='gzip') tdestgrp.require_dataset('data', (nti, nx, ny, nz), np.float32, chunks=True, compression='gzip') tdestgrp.require_dataset('error', (nti, nx, ny, nz, 5), np.float32, chunks=True, compression='gzip') else: ndestgrp.require_dataset('data', (nti, ), np.float32, chunks=True, compression='gzip') ndestgrp.require_dataset('error', (nti, 5), np.float32, chunks=True, compression='gzip') tdestgrp.require_dataset('data', (nti, ), np.float32, chunks=True, compression='gzip') tdestgrp.require_dataset('error', (nti, 5), np.float32, chunks=True, compression='gzip') probe_gain = float(attrs['gain'][0]) probe_atten = float(attrs['atten'][0]) ramp_gain = float(attrs['ramp_gain'][0]) ramp_atten = float(attrs['ramp_atten'][0]) resistor = float(attrs['resistor'][0]) #Ohms area = (attrs['area'][0] * u.Unit(attrs['area'][1])).to( u.cm**2).value probe_calib = np.power(10, probe_atten / 20.0) / probe_gain ramp_calib = np.power(10, ramp_atten / 20.0) / ramp_gain if grid: #Initialize time-remaining printout tr = util.timeRemaining(nx * ny * nz, reportevery=20) for xi in range(nx): for yi in range(ny): for zi in range(nz): i = zi + yi * nz + xi * nz * ny if verbose: tr.updateTimeRemaining(i) s = shotlist[xi, yi, zi, :] current = srcgrp['data'][ s, :, 0] * probe_calib / resistor vramp = srcgrp['data'][s, :, 1] * ramp_calib #Average over shots current = np.mean(current, axis=0) vramp = np.mean(vramp, axis=0) for ti in range(nti): a = int(start[ti]) b = int(end[ti]) vpp, kTe, esat, vthe, density, error = vsweep_fit( vramp[a:b], current[a:b], esat_range=None, exp_range=None, plots=False, area=area) ndestgrp['data'][ti, xi, yi, zi] = density ndestgrp['error'][ti, xi, yi, zi, :] = error tdestgrp['data'][ti, xi, yi, zi] = kTe tdestgrp['error'][ti, xi, yi, zi, :] = error else: #Not gridded current = srcgrp['data'][:, :, 0] current = np.mean(current, axis=0) * probe_calib / resistor vramp = srcgrp['data'][:, :, 1] vramp = np.mean(vramp, axis=0) * ramp_calib for ti in range(nti): a = start[ti] b = end[ti] vpp, kTe, esat, vthe, density, error = vsweep_fit( vramp[a:b], current[a:b], esat_range=None, exp_range=None, plots=False, area=area) ndestgrp['data'][ti] = density ndestgrp['error'][ti, :] = error tdestgrp['data'][ti] = kTe tdestgrp['error'][ti, :] = error for grp in grps: #Write the axes as required by the format of the data written if grid: grp.require_dataset( 'pos', (nshots, 3), np.float32, chunks=True)[:] = srcgrp['pos'][0:nshots] for k in srcgrp['pos'].attrs.keys(): grp['pos'].attrs[k] = srcgrp['pos'].attrs[k] dimlabels = ['time', 'xaxis', 'yaxis', 'zaxis'] grp.require_dataset('xaxis', (nx, ), np.float32, chunks=True)[:] = xaxis grp['xaxis'].attrs['unit'] = attrs['motion_unit'][0] grp.require_dataset('yaxis', (ny, ), np.float32, chunks=True)[:] = yaxis grp['yaxis'].attrs['unit'] = attrs['motion_unit'][0] grp.require_dataset('zaxis', (nz, ), np.float32, chunks=True)[:] = zaxis grp['zaxis'].attrs['unit'] = attrs['motion_unit'][0] else: dimlabels = ['time'] grp.require_dataset('time', (nti, ), np.float32, chunks=True) grp['time'][:] = time grp['time'].attrs['unit'] = srcgrp['time'].attrs['unit'] grp['data'].attrs['unit'] = 'G' ndestgrp['data'].attrs['unit'] = 'cm^{-3}' tdestgrp['data'].attrs['unit'] = 'eV' ndestgrp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] tdestgrp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] if verbose: print("End of Sweept Langmuir routine!") return True
def imgSeqRawToFull(src, dest): with h5py.File(src.file, 'r') as sf: #Get the datagroup srcgrp = sf[src.group] #Create dictionary of attributes attrs = hdftools.readAttrs(srcgrp) #Check for keys always required by this function req_keys = ['dt'] csvtools.missingKeys(attrs, req_keys, fatal_error=True) nframes, nxpx, nypx, nchan = srcgrp['data'].shape #Convert dt dt = (attrs['dt'][0] * u.Unit(attrs['dt'][1])).to(u.s).value #Reps is assumed to be 1 unless otherwise set if 'nreps' in attrs.keys() and not np.isnan(attrs['nreps'][0]): nreps = attrs['nreps'][0] else: nreps = 1 nti = int(nframes / nreps) #t0 is the time of the first frame in the set if 't0' in attrs.keys() and not np.isnan(attrs['t0'][0]): t0 = (attrs['t0'][0] * u.Unit(attrs['t0'][1])).to(u.s).value else: t0 = 0 #Laser t0 is the time when the laser fires #Time array will be shifted so this time is zero if 'camera_delay' in attrs.keys() and not np.isnan( attrs['camera_delay'][0]): camera_delay = (attrs['camera_delay'][0] * u.Unit(attrs['camera_delay'][1])).to(u.s).value else: camera_delay = 0 #dxdp is the pixel spacing in cm/px if 'dxdp' in attrs.keys() and not np.isnan(attrs['dxdp'][0]): dxdp = (attrs['dxdp'][0] * u.Unit(attrs['dxdp'][1])).to(u.cm).value else: dxdp = None if 'dydp' in attrs.keys() and not np.isnan(attrs['dydp'][0]): dydp = (attrs['dydp'][0] * u.Unit(attrs['dydp'][1])).to(u.cm).value else: dydp = None if 'x0px' in attrs.keys() and not np.isnan(attrs['x0px'][0]): x0px = (attrs['x0px'][0] * u.Unit(attrs['x0px'][1])).to(u.cm).value else: x0px = 0 if 'y0px' in attrs.keys() and not np.isnan(attrs['y0px'][0]): y0px = (attrs['y0px'][0] * u.Unit(attrs['y0px'][1])).to(u.cm).value else: y0px = 0 with h5py.File(dest.file, 'a') as df: destgrp = df.require_group(dest.group) destgrp.require_dataset("data", (nti, nxpx, nypx, nreps, nchan), np.float32, chunks=(1, nxpx, nypx, 1, 1), compression='gzip') destgrp['data'].attrs['unit'] = '' #Initialize time-remaining printout tr = util.timeRemaining(nti, reportevery=5) #Actually put the images into the file for i in range(nti): tr.updateTimeRemaining(i) a = i * nreps b = (i + 1) * nreps #print(str(a) + ":" + str(b)) #Copy, re-shape, and write data to array arr = srcgrp['data'][a:b, ...] arr = np.moveaxis(arr, 0, 2) #arr = np.reshape(arr, [nreps, nxpx, nypx, nchan]) destgrp['data'][i, ...] = arr #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, destgrp) dimlabels = [] time = np.arange(nti) * dt + camera_delay - t0 destgrp.require_dataset('time', (nti, ), np.float32, chunks=True)[:] = time destgrp['time'].attrs['unit'] = 's' dimlabels.append('time') if dxdp is not None: xaxis = (np.arange(nxpx) - x0px) * dxdp destgrp.require_dataset('xaxis', (nxpx, ), np.float32, chunks=True)[:] = xaxis destgrp['xaxis'].attrs['unit'] = 'cm' dimlabels.append('xaxis') else: destgrp.require_dataset('xpixels', (nxpx, ), np.float32, chunks=True)[:] = np.arange(nxpx) destgrp['xpixels'].attrs['unit'] = '' dimlabels.append('xpixels') if dydp is not None: yaxis = (np.arange(nypx) - y0px) * dydp destgrp.require_dataset('yaxis', (nypx, ), np.float32, chunks=True)[:] = yaxis destgrp['yaxis'].attrs['unit'] = 'cm' dimlabels.append('yaxis') else: destgrp.require_dataset('ypixels', (nypx, ), np.float32, chunks=True)[:] = np.arange(nypx) destgrp['ypixels'].attrs['unit'] = '' dimlabels.append('ypixels') destgrp.require_dataset('reps', (nreps, ), np.float32, chunks=True)[:] = np.arange(nreps) destgrp['reps'].attrs['unit'] = '' dimlabels.append('reps') destgrp.require_dataset('chan', (nchan, ), np.float32, chunks=True)[:] = np.arange(nchan) destgrp['chan'].attrs['unit'] = '' dimlabels.append('chan') destgrp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ]
def lapdToRaw( run, probe, hdf_dir, csv_dir, dest, verbose=False, trange=[0, -1]): """ Retreives the appropriate metadata for a run and probe in a given data directory, then reads in the data using the bapsflib module and saves it in a new hdf5 file. Parameters ---------- run: int Run number probe: str Probe name hdf_dir: str (path) Path to the directory where HDF files are stored csv_dir: str(path) Path to the directory where metadata CSV's are stored dest: hdfPath object Path string to location data should be written out verbose: boolean Set this flag to true to enable print statements throughout the code, including a runtime-until-completion estimate during the data reading loop. trange: [start_index, end_index] Time range IN INDICES over which to load the data. -1 in the second index will be translated to nti-1 Returns ------- True, if execution is successful """ #Create a dictionary of attributes from the entire directory of CSV #files that applies to this probe and run attrs = csvtools.getAllAttrs(csv_dir, run, probe) #Check that some required keys are present, throw a fatal error if not req_keys = ['datafile', 'digitizer', 'adc'] csvtools.missingKeys(attrs, req_keys, fatal_error=True) #Load some digitizer parameters we now know exist digitizer = attrs['digitizer'][0] adc = attrs['adc'][0] #TODO: Should this file take a data_dir and determine the filename #automatically, or should a source hdf file be given, leaving the program #that calls this one to determine the HDF file name? src = os.path.join(hdf_dir, attrs['datafile'][0] + '.hdf5') #Create an array of channels (required input for bapsflib read_data) # channel_arr = array of tuples of form: (digitizer, adc, board#, channel#) # eg. channel_arr = ('SIS crate', 'SIS 3305', 2, 1) #Do this in a loop, so the number of channels is flexible #However, the number of 'brd' and 'chan' fields MUST match #AND, the keys must be of the format 'brd1', 'chan1', etc. channel_arr = [] nchan = 1 while True: brdstr = 'brd' + str(int(nchan)) chanstr = 'chan' + str(int(nchan)) if brdstr in attrs.keys() and chanstr in attrs.keys(): #Check to make sure channel has actual non-nan values if not np.isnan(attrs[brdstr][0]) and not np.isnan(attrs[chanstr][0]): #Append the channel to the list to be extracted channel_arr.append( (digitizer, adc, attrs[brdstr][0], attrs[chanstr][0]) ) nchan = nchan + 1 else: break #Determine the number of channels from the channel array nchan = len(channel_arr) #Read some variables from the src file with bapsf_lapd.File(src, silent=True) as sf: src_digitizers = sf.digitizers digi = src_digitizers['SIS crate'] #Assume this id the digitizer: it is the only one #Assume the adc, nti, etc. are all the same on all the channels. #This line assumes that only one configuration is being used #This is usually the case: if it is not, changes need to be made daq_config = digi.active_configs[0] name, info = digi.construct_dataset_name(channel_arr[0][2], channel_arr[0][3], adc=channel_arr[0][1], config_name = daq_config, return_info=True) #Read out some digitizer parameters nshots = info['nshotnum'] nti = info['nt'] #clock_rate = info['clock rate'].to(u.Hz) #dt = ( 1.0 / clock_rate ).to(u.s) sti = trange[0] if trange[1] == -1: eti = nti-1 else: eti = trange[1] nti = eti- sti #Check if keys are provided to specify a motion list # control = array of tuples of form (motion control, receptacle) # eg. controls = [('6K Compumotor', receptacle)] # note that 'receptacle' here is the receptacle NUMBER, 1 - indexed!) req_keys = ['motion_controller', 'motion_receptacle'] if csvtools.missingKeys(attrs, req_keys, fatal_error = False): print("Some motion keys not found: positon data will not be read out!") controls, pos = None, None else: motion_controller = attrs['motion_controller'][0] motion_receptacle = attrs['motion_receptacle'][0] controls = [(motion_controller, motion_receptacle)] #Check to see if the motion controller reported actually exists in the #hdf file. If not, assume the probe was stationary (motion=None) #If motion_controller isn't in this list, lapdReadHDF can't handle it #Check if the motion controller provided is supported by the code and if motion_controller in ['6K Compumotor', 'NI_XZ', 'NI_XYZ']: pos, attrs = readPosArray(src, controls, attrs) else: controls, pos = None, None #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Create the destination file with h5py.File(dest.file, "a") as df: #Create the dest group, throw error if it exists if dest.group is not '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) grp = df[dest.group] #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, grp) #Open the LAPD file and copy the data over with bapsf_lapd.File(src, silent=True) as sf: #Initialize the output data array if 'data' in grp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset + associated attributes grp.require_dataset("data", (nshots, nti, nchan), np.float32, chunks=(1, np.min([nti, 20000]), 1), compression='gzip') grp['data'].attrs['unit'] = 'V' dimlabels = ['shots', 'time', 'chan'] grp['data'].attrs['dimensions'] = [s.encode('utf-8') for s in dimlabels] #Initialize time-remaining printout tr = util.timeRemaining(nchan*nshots) #Loop through the channels and shots, reading one-by-one into the #output dataset for chan in range(nchan): channel = channel_arr[chan] if verbose: print("Reading channel: " + str(chan+1) + '/' + str(nchan)) for shot in range(nshots): if verbose: tr.updateTimeRemaining(nshots*chan + shot) #Read the data through bapsflib data = sf.read_data(channel[2], channel[3], digitizer =channel[0], adc = channel[1], config_name = daq_config, silent=True, shotnum=shot+1) grp['data'][shot,:,chan] = data['signal'][0, sti:eti] if shot == 0: dt = data.dt #Adusted in bapsflib for clock rate, avging, etc. grp.attrs['dt'] = [s.encode('utf-8') for s in [str(dt.value), str(dt.unit)] ] #If applicable, write the pos array to file if pos is not None: grp.require_dataset('pos', (nshots, 3), np.float32)[:] = pos del pos #Create the axes grp.require_dataset('shots', (nshots,), np.float32, chunks=True )[:] = np.arange(nshots) grp['shots'].attrs['unit'] = '' t = np.arange(nti)*dt grp.require_dataset('time', (nti,), np.float32, chunks=True)[:] = t.value grp['time'].attrs['unit'] = str(t.unit) grp.require_dataset('chan', (nchan,), np.float32, chunks=True)[:] = np.arange(nchan) grp['chan'].attrs['unit'] = '' #Clear the LAPD HDF file from memory del(sf, data, t) return dest
def hrrToRaw(run, probe, hdf_dir, csv_dir, dest, verbose=False, debug=False): """ Retreives the appropriate metadata for a run and probe in a given data directory, then reads in the data from the HRR hdf5 output file. Parameters ---------- run: int Run number probe: str Probe name hdf_dir: str (path) Path to the directory where HDF files are stored csv_dir: str(path) Path to the directory where metadata CSV's are stored dest: hdfPath object Path string to location data should be written out verbose: boolean Set this flag to true to enable print statements throughout the code, including a runtime-until-completion estimate during the data reading loop. Returns ------- dest (Filepath to destination file) """ #Create a dictionary of attributes from the entire directory of CSV #files that applies to this probe and run attrs = csvtools.getAllAttrs(csv_dir, run, probe) #Check that some required keys are present, throw a fatal error if not req_keys = ['datafile'] csvtools.missingKeys(attrs, req_keys, fatal_error=True) #TODO: Should this file take a data_dir and determine the filename #automatically, or should a source hdf file be given, leaving the program #that calls this one to determine the HDF file name? src = os.path.join(hdf_dir, attrs['datafile'][0] + '.hdf5') #Create an array of channels #channel_arr = tuples of form (resource number, channel number) #Indexd from 1, to match load/LAPD.py channel_arr = [] nchan = 1 while True: digistr = 'resource' + str(int(nchan)) chanstr = 'chan' + str(int(nchan)) if chanstr in attrs.keys() and digistr in attrs.keys(): #Check to make sure channel has actual non-nan values if not np.isnan(attrs[digistr][0]) and not np.isnan( attrs[chanstr][0]): #Append the channel to the list to be extracted channel_arr.append((attrs[digistr][0], attrs[chanstr][0])) nchan = nchan + 1 else: break if debug: print("{:.0f} Data Channels found in csv".format(len(channel_arr))) #Create a dictionary of position channels #channel_arr = tuples of form (resource number, channel number) ax = ['x', 'y', 'z'] pos_chan = {} nchan = 1 for i in range(3): digistr = ax[i] + 'pos_resource' chanstr = ax[i] + 'pos_chan' if chanstr in attrs.keys() and digistr in attrs.keys(): #Check to make sure channel has actual non-nan values if not np.isnan(attrs[digistr][0]) and not np.isnan( attrs[chanstr][0]): #Append the channel to the list to be extracted pos_chan[ax[i]] = (attrs[digistr][0], attrs[chanstr][0]) else: pos_chan[ax[i]] = None else: pos_chan[ax[i]] = None if debug: print("{:.0f} Pos Channels found in csv".format(len(pos_chan))) #Determine the number of channels from the channel array nchan = len(channel_arr) #Read some variables from the src file with h5py.File(src, 'r') as sf: digi_name = 'RESOURCE ' + str(channel_arr[0][0]) print(digi_name) digigrp = sf[digi_name] resource_type = digigrp.attrs['RESOURCE TYPE'].decode('utf-8') attrs['RESOURCE ALIAS'] = ( digigrp.attrs['RESOURCE ALIAS'].decode('utf-8'), '') attrs['RESOURCE DESCRIPTION'] = ( digigrp.attrs['RESOURCE DESCRIPTION'].decode('utf-8'), '') attrs['RESOURCE ID'] = (digigrp.attrs['RESOURCE ID'], '') attrs['RESOURCE MODEL'] = ( digigrp.attrs['RESOURCE MODEL'].decode('utf-8'), '') attrs['RESOURCE TYPE'] = (resource_type, '') resource_unit = digigrp['CHANNEL 0']['UNITS'][0].decode('utf-8') attrs['motion_unit'] = ('mm', '') if resource_type == 'SCOPE': dataname = 'TRACE' nshots = digigrp['CHANNEL 0'][dataname].shape[0] nti = digigrp['CHANNEL 0'][dataname].shape[1] dt = digigrp['CHANNEL 0'][dataname].attrs['WAVEFORM DT'] * u.s attrs['dt'] = [str(dt.value), str(dt.unit)] #attrs['dt'] = [s.encode('utf-8') for s # in [str(dt.value), str(dt.unit) ] ] elif resource_type == 'MOTOR BOARD': dataname = 'POSITION' nshots = digigrp['CHANNEL 0'][dataname].shape[0] nti = 1 #Create the destination file with h5py.File(dest.file, "a") as df: #Create the dest group, throw error if it exists if dest.group is not '/' and dest.group in df.keys(): raise hdftools.hdfGroupExists(dest) grp = df[dest.group] #Initialize the output data array if 'data' in grp.keys(): raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'") #Create the dataset + associated attributes grp.require_dataset("data", (nshots, nti, nchan), np.float32, chunks=(1, np.min([nti, 20000]), 1), compression='gzip') grp['data'].attrs['unit'] = resource_unit dimlabels = ['shots', 'time', 'chan'] grp['data'].attrs['dimensions'] = [ s.encode('utf-8') for s in dimlabels ] #Open the hdf5 file and copy the data over with h5py.File(src) as sf: #Initialize time-remaining printout tr = util.timeRemaining(nchan * nshots) #Loop through the channels and shots, reading one-by-one into the #output dataset for chan in range(nchan): digi_name = 'RESOURCE ' + str(channel_arr[chan][0]) chan_name = 'CHANNEL ' + str(channel_arr[chan][1]) if verbose: print("Reading channel: " + str(chan + 1) + '/' + str(nchan)) for shot in range(nshots): if verbose: tr.updateTimeRemaining(nshots * chan + shot) #Read the data from the hdf5 file grp['data'][shot, :, chan] = sf[digi_name][chan_name][dataname][ shot, ...] if pos_chan['x'] is not None or pos_chan[ 'y'] is not None or pos_chan['z'] is not None: grp.require_dataset('pos', (nshots, 3), np.float32) ax = ['x', 'y', 'z'] unit_factor = (1.0 * u.Unit(attrs['motion_unit'][0])).to( u.cm).value attrs['motion_unit'] = ('cm', '') for i, a in enumerate(ax): if pos_chan[a] is not None: resname = 'RESOURCE ' + str(pos_chan[a][0]) channame = 'CHANNEL ' + str(int(pos_chan[a][1])) try: posdata = sf[resname][channame][ 'POSITION'][:] * unit_factor except KeyError: print("(!) POSITION Information not found for " + resname) print( "If motion is not included in run, set resource to NA in csv" ) #Handle the case where the multiple data points were #taken at a position so npos!=nshots npos = posdata.size if npos != nshots: posdata = np.repeat(posdata, int(nshots / npos)) grp['pos'][:, i] = posdata else: grp['pos'][:, i] = np.zeros(nshots) #Create the axes grp.require_dataset('shots', (nshots, ), np.float32, chunks=True)[:] = np.arange(nshots) grp['shots'].attrs['unit'] = '' grp.require_dataset('chan', (nchan, ), np.float32, chunks=True)[:] = np.arange(nchan) grp['chan'].attrs['unit'] = '' if resource_type == 'SCOPE': t = np.arange(nti) * dt grp.require_dataset('time', (nti, ), np.float32, chunks=True)[:] = t.value grp['time'].attrs['unit'] = str(t.unit) #Write the attrs dictioanry into attributes of the new data group hdftools.writeAttrs(attrs, grp) return dest