Exemplo n.º 1
0
def calcT0ind(srcgrp, verbose=False):
    try:
        nshots, nti, nchan = srcgrp['data'].shape
    except KeyError:
        raise KeyError(
            "tdiode.calcT0ind requires the data array to have an attribute 'shape'!"
        )

    t0ind_array = np.zeros([nshots])

    tr = util.timeRemaining(nshots)
    if verbose:
        print("Calculating t0 indices")

    for i in range(nshots):
        #Update time remaining
        if verbose:
            tr.updateTimeRemaining(i)
        maxind = np.argmax(srcgrp['data'][i, :, 0])
        try:
            t0ind_array[i] = np.argmax(
                np.gradient(srcgrp['data'][i, 0:maxind, 0]))
        except ValueError:
            #This error can be thrown on a bad shot: shot will eventually be
            #ignored, so value written here doesn't ultimately matter.
            t0ind_array[i] = 0

    del (nshots, nti, nchan)
    return t0ind_array.astype(int)
Exemplo n.º 2
0
def findBadShots(srcgrp,
                 t0indarr,
                 verbose=False,
                 badshotratio=None,
                 fatal_badshot_percentage=None):
    try:
        nshots, nti, nchan = srcgrp['data'].shape
    except KeyError:
        raise KeyError(
            "tdiode.findBadShots requires the data array to have an attribute 'shape'!"
        )

    goodshots_arr = []
    badshots_arr = []

    if badshotratio is None:
        badshotratio = 10

    if fatal_badshot_percentage is None:
        fatal_badshot_percentage = 0.2

    tr = util.timeRemaining(nshots)
    if verbose:
        print("Identifying bad shots")

    for i in range(nshots):
        #Update time remaining
        if verbose:
            tr.updateTimeRemaining(i)
        #TODO: trying using the mean of the last 500 points rather than the median as the reference
        pret0 = int(t0indarr[i] * 0.75)

        #Remove offset
        arr = srcgrp['data'][i, :, 0] - np.median(srcgrp['data'][i, :, 0])
        #Ratio is between the stdev of the early stuff and the maximum value
        max_median_ratio = np.max(arr) / np.abs(np.std(arr[0:pret0]))
        #This defines a 'bad shot' where the laser diode was indistinct,
        #indicating a possible misfire
        if (max_median_ratio < badshotratio):
            badshots_arr.append(i)
        else:
            goodshots_arr.append(i)

    print("Found " + str(len(badshots_arr)) + ' bad shots')

    if len(badshots_arr) / nshots > fatal_badshot_percentage:
        raise ValueError("Lots of bad shots found! Bad sign! Aborting.")

    return badshots_arr, goodshots_arr
Exemplo n.º 3
0
def scopeRawToFull(src,
                   dest,
                   port=14,
                   tdiode_hdf=None,
                   verbose=False,
                   debug=False,
                   vdist=False):
    """ 

    Parameters
    ----------
        src: hdfPath object
            Path string to a raw hdf5 file containing bdot data
            
        dest: hdfPath object
            Path string to location processed bdot data should be written out

        tdiode_hdf:  hdfPath object
            Path to a raw hdf5 file containing tdiode data. If no HDF file is
            provided, no timing correction will be applied. 
            
        port: float
            port at which the probe is located
            
             
            
    Returns
    -------
       True (if executes to the end)
    """

    # ******
    # Load data from the raw HDF file
    # ******
    with h5py.File(src.file, 'r') as sf:

        #Get the datagroup
        srcgrp = sf[src.group]

        #Create dictionary of attributes
        attrs = hdftools.readAttrs(srcgrp)

        #Check for keys always required by this function
        req_keys = []

        #Process the required keys, throwing an error if any cannot be found
        csvtools.missingKeys(attrs, req_keys, fatal_error=True)

        #Extract the shape of the source data
        nshots, nti, nchan = srcgrp['data'].shape

        if verbose:
            print("Opening destination HDF file")

        #Create the destination file directory if necessary
        hdftools.requireDirs(dest.file)

        #Open the destination file
        #This exists WITHIN the open statement for the source file, so the
        #source file is open at the same time.
        with h5py.File(dest.file, 'a') as df:

            #Throw an error if this group already exists
            if dest.group is not '/' and dest.group in df.keys():
                raise hdftools.hdfGroupExists(dest)

            destgrp = df.require_group(dest.group)

            #Copy over attributes
            hdftools.copyAttrs(srcgrp, destgrp)

            #Load the time vector
            t = srcgrp['time']
            #If tdiode_hdf is set, load the pre-processed tdiode data
            if tdiode_hdf is not None:
                if verbose:
                    print("Loading tdiode array from file.")
                with h5py.File(tdiode_hdf.file, 'r') as sf:
                    grp = sf[tdiode_hdf.group]
                    t0indarr = grp['t0indarr'][:]
                    goodshots = grp['goodshots'][:]
                    tdiode_attrs = hdftools.readAttrs(grp)

                #If tdiode was digitized with a different dt, this correction
                #will be necessary
                dt_ratio = float(attrs['dt'][0]) / float(tdiode_attrs['dt'][0])
                t0indarr = (t0indarr / dt_ratio).astype(np.int32)

                #We will remove up to max_t0shift indices from each array such that
                #the t0 indices all line up.
                min_t0ind = np.min(t0indarr[goodshots])
                max_t0shift = np.max(t0indarr[goodshots]) - min_t0ind
                #Compute new nti
                nti = nti - max_t0shift

                t = t[0:nti] - t[min_t0ind]

            #Throw an error if this dataset already exists
            if 'data' in destgrp.keys():
                raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'")

            #Create the dataset 'data' appropriate to whether or not output
            #data will be gridded
            if verbose:
                print("Creating 'data' group in destination file")

            destgrp.require_dataset('data', (nshots, nti),
                                    np.float32,
                                    chunks=(1, np.min([nti, 20000])),
                                    compression='gzip')

            #Initialize time-remaining printout
            tr = util.timeRemaining(nshots)

            if verbose:
                print("Beginning processing data shot-by-shot.")

            #Chunking data processing loop limits memory usage
            for i in range(nshots):

                #Update time remaining
                if verbose:
                    tr.updateTimeRemaining(i)

                #If a tdiode hdf was supplied, calculate the index correction
                #here
                if tdiode_hdf is not None:
                    #Calculate the starting and ending arrays for the data
                    ta = t0indarr[i] - min_t0ind
                    tb = ta + nti

                else:
                    #By default, read in the entire dataset
                    ta = None
                    tb = None

                if debug:
                    print("Data range: [" + str(ta) + "," + str(tb) + "]")

                #Read in the data from the source file
                signal = np.squeeze(srcgrp['data'][i, ta:tb])

                destgrp['data'][i, :] = signal

            destgrp['data'].attrs['unit'] = ''

            if 'pos' in srcgrp:
                destgrp.copy(srcgrp['pos'], 'pos')

            destgrp.require_dataset('shots', (nshots, ), np.int32,
                                    chunks=True)[:] = srcgrp['shots'][:]
            destgrp['shots'].attrs['unit'] = srcgrp['shots'].attrs['unit']

            dimlabels = ['shots', 'time']
            destgrp.require_dataset('time', (nti, ), np.float32,
                                    chunks=True)[:] = t
            destgrp['time'].attrs['unit'] = 's'

            destgrp['data'].attrs['dimensions'] = [
                s.encode('utf-8') for s in dimlabels
            ]

            if verbose:
                print("End of Monochromator routine!")

            return True
Exemplo n.º 4
0
def chunked_array_op(src,
                     dest,
                     ax,
                     op,
                     newshape,
                     delsrc=False,
                     verbose=False,
                     **args):
    """
    Apply one of the array functions to an entire dataset, breaking the
    dataset up into chunks to keep memory load low.
    
    src -> Source dataset (hdfpath object)
    dest -> Destination dataset path (hdfpath object)
    ax -> Axis (0 indexed) to average
    op -> Function to be applied. This function must be one of the op functions
    defined in this file, and must be included in the elif tree in this function
    newshape -> Shape the new dataset will be after op has been applied
    delsrc -> Boolean, if true src file will be deleted after operation
    verbose -> Boolean, if true activates printouts
    """

    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]

        #Check source is valid dataset
        validDataset(srcgrp)

        #Load information about source dataset
        oldshape = list(srcgrp['data'].shape)
        ndim = len(oldshape)
        dimlabels = hdftools.arrToStrList(
            srcgrp['data'].attrs['dimensions'][:])

        #Get ax index
        axind = getAxInd(ax, dimlabels)

        #Decide on a chunking axis
        #Get a list of the axes indices ordered by chunk size, largest to smallest
        chunks = np.flip(np.argsort(srcgrp['data'].chunks))

        #Chose the largest one that ISN'T the chosen axis
        chunkax = chunks[0]
        if chunkax == axind:
            chunkax = chunks[1]
        print("Chunking axis: " + str(dimlabels[chunkax]))

        if srcgrp['data'].chunks[chunkax] < 2:
            print("WARNING: POSSIBLE INEFFICENT CHUNKING DETECTED!")

        #Determine optimal chunksize (along chunkax)
        ideal_chunk_elms = 1e7  #1e7*4 bytes (per float32) ~ 40mb, which is good
        nper = np.product(oldshape) / oldshape[
            chunkax]  #number of values per chunk ax value

        chunksize = int(np.round(ideal_chunk_elms / nper))
        if chunksize < 1:
            chunksize = 1

        #Determine nchunks
        nchunks = int(np.ceil(oldshape[chunkax] / chunksize))

        #Create the destination dataset
        with h5py.File(dest.file, 'w') as df:
            destgrp = df[dest.group]

            #Copy all the dataset attributes
            hdftools.copyAttrs(srcgrp, destgrp)

            #Create new data array
            destgrp.require_dataset('data',
                                    newshape,
                                    np.float32,
                                    chunks=True,
                                    compression='gzip')
            destgrp['data'].attrs['unit'] = srcgrp['data'].attrs['unit']

            if verbose:
                print(srcgrp['data'].shape)
                print(destgrp['data'].shape)

            #Copy the axes over, except the one being operated on
            #That axis will be copied over later, with changes
            for axis in dimlabels:
                if axis != ax:
                    srcgrp.copy(axis, destgrp)

            #Create the axis being operated on...unless it is now trivial
            #Newshape was determined above, and is specific to the op
            if newshape[axind] > 1:
                destgrp.require_dataset(ax, (newshape[axind], ),
                                        np.float32,
                                        chunks=True)
                destgrp[ax].attrs['unit'] = srcgrp[ax].attrs['unit']
                new_dimlabels = dimlabels  #No changes
            else:
                new_dimlabels = dimlabels.pop(axind)  #Trivial: remove this dim

            destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(
                new_dimlabels)

            #Initialize time-remaining printout
            #Chunks are big, so report more often than usual
            tr = util.timeRemaining(nchunks, reportevery=1)

            for i in range(nchunks):
                #Update time remaining
                if verbose:
                    tr.updateTimeRemaining(i)
                sl = [slice(None)] * ndim

                #Assemble the chunk slices
                if i != nchunks - 1:
                    sl[chunkax] = slice(i * chunksize, (i + 1) * chunksize,
                                        None)
                else:
                    sl[chunkax] = slice(i * chunksize, None, None)

            #Apply op to the chunk
                op(srcgrp['data'], destgrp['data'], sl, axind, args)

            #Make the new axis by applying op to the old axis
            op(srcgrp[ax], destgrp[ax], [slice(None)], 0, args)

    #If requested, delete the source file
    if delsrc:
        os.remove(src.file)
Exemplo n.º 5
0
def imgDirToRaw(run, probe, img_dir, dest, csv_dir, verbose=False):

    #Import attributes for this run/probe
    attrs = csvtools.getAllAttrs(csv_dir, run, probe)

    #Check for keys always required by this function
    req_keys = ['run_folder']
    csvtools.missingKeys(attrs, req_keys, fatal_error=True)

    run_folder = attrs['run_folder'][0]
    src = os.path.join(img_dir, run_folder)

    #Go through the directory and fine all the image files
    imgfiles = []
    for root, dirs, files in os.walk(src):
        files = [f for f in files
                 if f[0] != '.']  #Exclude files beginning in .
        for file in files:
            imgfiles.append(os.path.join(src, file))

    #Natural-sort the images by filename
    imgfiles = natural_sort(imgfiles)

    nframes = len(imgfiles)

    #remove files if they already exist
    if os.path.exists(dest.file):
        os.remove(dest.file)

    #Create the destination file
    with h5py.File(dest.file, "a") as df:

        #Assume all images are the same shape, load the first one to figure
        #out the array dimensions

        img = PIL.Image.open(imgfiles[0])
        nxpx, nypx = img.size
        #Bands will include the names of the different channels
        nchan = len(img.getbands())

        #Create the dest group, throw error if it exists
        if dest.group != '/' and dest.group in df.keys():
            raise hdftools.hdfGroupExists(dest)
        grp = df[dest.group]

        #Initialize the output data array
        if 'data' in grp.keys():
            raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'")

        #Create the dataset + associated attributes
        grp.require_dataset("data", (nframes, nxpx, nypx, nchan),
                            np.float32,
                            chunks=(1, nxpx, nypx, 1),
                            compression='gzip')
        grp['data'].attrs['unit'] = ''

        #Initialize time-remaining printout
        tr = util.timeRemaining(nframes, reportevery=5)

        #Actually put the images into the file
        for i, f in enumerate(imgfiles):
            tr.updateTimeRemaining(i)
            img = np.array(PIL.Image.open(f))

            img = np.reshape(img, [nxpx, nypx, nchan])

            #Rotate images
            for chan in range(nchan):
                img[:, :, chan] = np.rot90(img[:, :, chan], k=3)

            grp['data'][i, :, :, :] = img

        dimlabels = ['frames', 'xpixels', 'ypixels', 'chan']
        grp['data'].attrs['dimensions'] = [
            s.encode('utf-8') for s in dimlabels
        ]

        #Write the attrs dictioanry into attributes of the new data group
        hdftools.writeAttrs(attrs, grp)

        #Create the axes
        grp.require_dataset('frames', (nframes, ), np.float32,
                            chunks=True)[:] = np.arange(nframes)
        grp['frames'].attrs['unit'] = ''

        grp.require_dataset('xpixels', (nxpx, ), np.float32,
                            chunks=True)[:] = np.arange(nxpx)
        grp['xpixels'].attrs['unit'] = ''

        grp.require_dataset('ypixels', (nypx, ), np.float32,
                            chunks=True)[:] = np.arange(nypx)
        grp['ypixels'].attrs['unit'] = ''

        grp.require_dataset('chan', (nchan, ), np.float32,
                            chunks=True)[:] = np.arange(nchan)
        grp['chan'].attrs['unit'] = ''

    return dest
Exemplo n.º 6
0
def fullToBmag(src, dest, verbose=False):
    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]
        try:
            dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:] )
            shape =  np.array(srcgrp['data'].shape)
            #Same as the old shape, but now without the channels dimension...
            shape[-1] = 1

        except KeyError: 
            raise KeyError("bdot.fullToBmag requires the data array to have an attribute 'dimensions' and 'shape'")
            
        #We will duplicate the chunking on the new array
        chunks = srcgrp['data'].chunks
        

        try:
            xax = dimlabels.index("xaxis") 
            yax = dimlabels.index("yaxis") 
            zax = dimlabels.index("zaxis") 
            
            xaxis = srcgrp['xaxis']
            yaxis = srcgrp['yaxis']
            zaxis = srcgrp['zaxis']
            
            nti = shape[ dimlabels.index("time")  ]
            nx = shape[xax]
            ny = shape[yax]
            nz = shape[zax]
            
        except KeyError:
            raise KeyError("bdot.fullToBmag requires dimensions 'time', 'xaxis', 'yaxis', 'zaxis'")
            

        
        #Create the destination file directory if necessary
        hdftools.requireDirs(dest.file)
        
        #Delete destination file if it already exists
        if os.path.exists(dest.file):
          os.remove(dest.file)
        
        with h5py.File(dest.file, 'w') as df:
            destgrp = df[dest.group]
            
            destgrp.require_dataset('data', shape, np.float32, chunks=chunks, compression='gzip')
            destgrp['data'].attrs['unit'] = 'G'
            destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(dimlabels)
            
            #Copy the axes over
            for ax in dimlabels:
                if ax != 'chan':
                     srcgrp.copy(ax, destgrp)
                else:
                     destgrp.require_dataset('chan', (1,), np.int32, chunks=True)[:] = [0]
                     destgrp['chan'].attrs['unit'] = ''
                     
                
                
            chunksize = 100
            nchunks = int(np.ceil(nti/chunksize))
            
            #Initialize time-remaining printout
            tr = util.timeRemaining(nchunks, reportevery=10)
            
            for i in range(nchunks):
                #Update time remaining
                if verbose:
                        tr.updateTimeRemaining(i)

                a = i*chunksize
                if i == nchunks-1:
                    b = None
                else:
                    b = (i+1)*chunksize
                
                bx = srcgrp['data'][a:b, ..., 0]
                by = srcgrp['data'][a:b, ..., 1]
                bz = srcgrp['data'][a:b, ..., 2]
                
                destgrp['data'][a:b, ...,0] = np.sqrt(np.power(bx,2) +
                       np.power(by,2) + np.power(bz,2))

           
        return dest     
Exemplo n.º 7
0
def fullToCurrent(src, dest, verbose=False):
    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]
        try:
            dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:] )
            shape =  srcgrp['data'].shape
        except KeyError: 
            raise KeyError("bdot.fullToCurrent requires the data array to have an attribute 'dimensions' and 'shape'")
            
        #We will duplicate the chunking on the new array
        chunks = srcgrp['data'].chunks
        

        try:
            xax = dimlabels.index("xaxis") 
            yax = dimlabels.index("yaxis") 
            zax = dimlabels.index("zaxis") 
            
            xaxis = srcgrp['xaxis']
            yaxis = srcgrp['yaxis']
            zaxis = srcgrp['zaxis']
            
            nti = shape[ dimlabels.index("time")  ]
            nx = shape[xax]
            ny = shape[yax]
            nz = shape[zax]
            
        except KeyError:
            raise KeyError("bdot.fullToCurrent requires dimensions 'time', 'xaxis', 'yaxis', 'zaxis'")
            
            
        if nti > 10000:
            print("WARNING: NTI IS LARGE! CURRENT CALCULATION WILL TAKE A VERY LONG TIME!")
            print("If you have better things to do with your CPU hours, try thinning the data first.")
        
        #Create the destination file directory if necessary
        hdftools.requireDirs(dest.file)
        
        #Delete destination file if it already exists
        if os.path.exists(dest.file):
          os.remove(dest.file)
        
        with h5py.File(dest.file, 'w') as df:
            destgrp = df[dest.group]
            
            destgrp.require_dataset('data', shape, np.float32, chunks=chunks, compression='gzip')
            destgrp['data'].attrs['unit'] = 'A/cm^2'
            destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(dimlabels)
            
            #Copy the axes over
            for ax in dimlabels:
                srcgrp.copy(ax, destgrp)
                
                
            chunksize = 100
            nchunks = int(np.ceil(nti/chunksize))
            
            #Initialize time-remaining printout
            tr = util.timeRemaining(nchunks, reportevery=10)
            
            for i in range(nchunks):
                #Update time remaining
                if verbose:
                        tr.updateTimeRemaining(i)

                a = i*chunksize
                if i == nchunks-1:
                    b = None
                else:
                    b = (i+1)*chunksize
                
                #Constant is (c/4pi) * (conversion CGS -> A/m^2)*(conversion A/m^2 -> A/cm^2)
                #(2.99e10/4pi)*(3.0e-5)*(1e-4)
                #3e-5 is from the NRL formulary
                destgrp['data'][a:b, ...] = (7.138)*math.curl(srcgrp['data'][a:b, ...], 
                    xax, yax, zax, xaxis, yaxis, zaxis)
                
        return dest
Exemplo n.º 8
0
def bdotRawToFull(src, dest, 
                  tdiode_hdf=None, grid=False, integrate=True, 
                  calibrate =True, highfreq_calibrate=True,
                  angle_correction = True, remove_offset = True,
                  replace_badshots = True,
                  verbose=False, debug = False,
                  offset_range=(0,100), offset_rel_t0 = (False, False), 
                  grid_precision=0.1, strict_grid=False, strict_axes = False):
    """ Integrates bdot data, calibrates output using information about the probe.
        Corrects for probe angle based on which drive is being used.
    Parameters
    ----------
        src: hdfPath object
            Path string to a raw hdf5 file containing bdot data
            
        dest: hdfPath object
            Path string to location processed bdot data should be written out
        tdiode_hdf:  hdfPath object
            Path to a raw hdf5 file containing tdiode data. If no HDF file is
            provided, no timing correction will be applied.
            
        grid: Boolean
            If grid is true, output will be written in cartesian grid array
            format, eg. [nti, nx, ny, nz, nreps, nchan]. Otherwise, output will
            be in [nshots, nti, nchan] format
            
            
        integrate: Boolean
             If True, integrate the bdot data (usually you want to do this).
             Default is True
             
        calibrate: Boolean
             If True, calculate and apply ANY calibration factors 
             to the data. Default is True.
             
        highfreq_calibrate: Boolean
             If True, calculate and apply the high frequency calibration 
             factors to the data. Default is True. If the 'tau' variables are
             not specified in the probe metadata, the HF calibration won't be
             applied regardless of this keyword.
             
        angle_correction: Boolean
             If True, apply any angular correction between axes that is
             required based on the motion_format keyword in the metadata. If 
             false, no correction is applied regardless of the metadata.
             Default is True.
             
       remove_offset: Boolean
            If True, remove an offset from the data based on the offset_range
            specified in those keywords. If False, data will remain as-is. 
            Default is True.
            
        replace_badshots: Boolean
            If True, semi-intelligently replace bad shots with neighboring
            good shots. If False, data remains as-is.
            Default is True.
            
        offset_range: tuple
            Tuple of indices between which the average of the signal will be
            computed and subtracted from the entire signal to correct for
            offset. This should be a segment with just noise, ideally at the
            very beginning of the dataset. Longer is better. 
            Default is (0,100)
            
        offset_rel_t0: Tuple of booleans
            If either of these values is set to True, the coorresponding
            offset_range value will be taken to be relative to the t0 index
            for that each shot. For example, if t0=2000 for a shot, 
            offset_range=(10, -100), and offset_rel_t0 = (False, True), then
            the offset will be computed over the range (10, 1900)
            
            
        grid_precision: float
            This is the precision to which position values will be rounded
            before being fit onto the grid. Only applies to fuzzy axis and grid
            creation.
            
        strict_axes: boolean
            If true, attempt to calculate axes from saved grid parameters.
            Default is false, which attempts to calculate axes by looking at
            position values.
            
        strict_grid: boolean
            If true, strictly unravel data onto the axes, assuming the probe
            moved in order reps->X->Y->Z. This will NOT correctly handle
            points where the probe was not at the requested position. Default
            is false, which applys "fuzzy gridding", which tries to find the
            best grid position for each shot individually.
    Returns
    -------
       True (if executes to the end)
    """ 

    # ******
    # Load data from the raw HDF file
    # ******
    with h5py.File(src.file, 'r') as sf:
         
        #Get the datagroup
        srcgrp = sf[src.group]
        
        #Create dictionary of attributes
        attrs = hdftools.readAttrs(srcgrp)
        
        #Check for keys always required by this function
        req_keys = ['xarea', 'yarea', 'zarea',
                    'xatten', 'yatten', 'zatten', 'gain',
                    'xpol', 'ypol', 'zpol', 'roll', 
                    'probe_origin_x', 'probe_origin_y', 'probe_origin_z',
                    'dt', 'nturns']
       


        if  'pos' in srcgrp:
            pos = srcgrp['pos'][:] #Read the entire array in
            #If pos array exists, there are keywords required for that too.
            motion_format = attrs['motion_format'][0]
            if motion_format == 'fixed_pivot' and angle_correction:
                req_keys = req_keys + ['rot_center_x', 'rot_center_y', 'rot_center_z']
            elif motion_format == 'cartesian' and angle_correction:
                pass
            elif not angle_correction:
                pass
            else:
                raise ValueError("Motion format unrecognized: " + str(attrs['motion_format'][0]) )
            
        else:
            #If no position information is given, a single explicit position
            #is required. 
            req_keys = req_keys + ['xpos', 'ypos', 'zpos']
            grid = False #Can't grid data if there's no pos array!
            motion_format = None
            
        #Process the required keys, throwing an error if any cannot be found
        csvtools.missingKeys(attrs, req_keys, fatal_error=True)
        

        #Extract the shape of the source data
        nshots, nti, nchan = srcgrp['data'].shape
        
        #If requested by keyword, apply gridding
        if grid:
           shotgridind, xaxis, yaxis, zaxis, nx, ny, nz, nreps, nshots = postools.grid(
                     pos, attrs, strict_axes=strict_axes, 
                     strict_grid=strict_grid, grid_precision=grid_precision, 
                     invert=False)
           


            
        if verbose:
            print("Opening destination HDF file")
        
        #Create the destination file directory if necessary
        hdftools.requireDirs(dest.file)

        #Open the destination file
        #This exists WITHIN the open statement for the source file, so the
        #source file is open at the same time.
        with h5py.File(dest.file, 'a') as df:
            
            #Throw an error if this group already exists
            if dest.group is not '/' and dest.group in df.keys():
                raise hdftools.hdfGroupExists(dest)
            
            destgrp = df.require_group(dest.group)
            
            

            
            #Copy over attributes
            hdftools.copyAttrs(srcgrp, destgrp)
        
            #Load the time vector
            t = srcgrp['time']
            
    
            #If a timing diode is being applied, correct the time vector here.
            if tdiode_hdf is not None:
                if verbose:
                    print("Loading tdiode array from file.")
                with h5py.File(tdiode_hdf.file, 'r') as sf:
                    grp = sf[tdiode_hdf.group]
                    t0indarr = grp['t0indarr'][:]
                    goodshots = grp['goodshots'][:]
                    badshots = grp['badshots'][:]
                    tdiode_attrs = hdftools.readAttrs(grp)
                    
                #If tdiode was digitized with a different dt, this correction
                #will be necessary
                dt_ratio = float(attrs['dt'][0])/float(tdiode_attrs['dt'][0])
                t0indarr = (t0indarr/dt_ratio).astype(np.int32)
                    
                #We will remove up to max_t0shift indices from each array such that
                #the t0 indices all line up.
                min_t0ind = np.min(t0indarr[goodshots])
                max_t0shift = np.max(t0indarr[goodshots]) - min_t0ind
                #Compute new nti
                nti = nti - max_t0shift 
                
        
                t = t[0:nti] - t[min_t0ind]

        


            #Throw an error if this dataset already exists
            if 'data' in destgrp.keys():
                    raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'")
                    
            #Create the dataset 'data' appropriate to whether or not output
            #data will be gridded
            if verbose:
                print("Creating 'data' group in destination file")
            if grid:
                destgrp.require_dataset('data', (nti, nx, ny, nz, nreps, nchan), np.float32, chunks=(np.min([nti, 20000]),1,1,1,1,1), compression='gzip')
            else:
                destgrp.require_dataset('data', (nshots, nti, nchan), np.float32, chunks=(1, np.min([nti, 20000]), 1), compression='gzip')
            
            # dt -> s
            dt = ( attrs['dt'][0]*u.Unit(attrs['dt'][1])).to(u.s).value
          
            
            if calibrate:
                 
                 #First calculate the low frequency calibration factors
                 calAx, calAy, calAz = calibrationFactorsLF(attrs)
                
                 
                 #If HF calibration factors are provided, calculate those
                 #calibraton constants too
                 if 'xtau' in attrs.keys() and highfreq_calibrate:
                     calBx, calBy, calBz = calibrationFactorsHF(attrs)
                 else:
                      calBx, calBy, calBz = None,None,None
                      
                      
            #This segment of code checks for bad shots and replaces them with
            #Neighboring good shots
            shotlist = np.arange(nshots)
            if replace_badshots and tdiode_hdf is not None:
                for i in shotlist:
                    if i in badshots:
                        #If the shot is bad, determine the best neighbor shot
                        #to replace it with
                        
                        before_shot = i
                        after_shot = i
                        #Identify nearest good shot before and after
                        while before_shot in badshots:
                            before_shot = before_shot - 1   
                        while after_shot in badshots:
                            after_shot = after_shot + 1
                            

                        #If position data is provided, use that to determine
                        #the best match
                        if  'pos' in srcgrp:
                            before_dist = (np.power(pos[i,0] - pos[before_shot,0],2) + 
                                           np.power(pos[i,1] - pos[before_shot,1],2) + 
                                           np.power(pos[i,2] - pos[before_shot,2],2) )
                            
                            after_dist = (np.power(pos[i,0] - pos[after_shot,0],2) + 
                                           np.power(pos[i,1] - pos[after_shot,1],2) + 
                                           np.power(pos[i,2] - pos[after_shot,2],2) )
                            
                            if before_dist > after_dist:
                                best_match = after_shot
                                
                            else:
                                best_match = before_shot
                        #Otherwise just chose the earlier shot as the default
                        else:
                             best_match = before_shot

                             
                        if verbose:
                            print("Replaced bad shot " + str(i) + " with " + str(best_match))
                        
                        #Actually make the substitution
                        shotlist[i] = best_match
                        

            
            #Initialize time-remaining printout
            tr = util.timeRemaining(nshots)
            
            if verbose:
                print("Beginning processing data shot-by-shot.")


       
            #Chunking data processing loop limits memory usage
            for ind in range(nshots):
                
                #i == ind unless this is a bad shot
                i = shotlist[ind]
                
                
                #Update time remaining
                if verbose:
                        tr.updateTimeRemaining(i)

                #If a tdiode hdf was supplied, calculate the index correction
                #here
                if tdiode_hdf is not None and remove_offset:
                    #Calculate the starting and ending arrays for the data
                    ta = t0indarr[ind] - min_t0ind
                    tb = ta + nti

                    #Calculate the range over which to calculate the offset
                    #for each shot
                    #If offset_rel_t0 is set for either point, add the t0 array
                    if offset_rel_t0[0]:
                        offset_a = offset_range[0] + t0indarr[i] - ta
                    else:
                        offset_a = offset_range[0]
                        
                    if offset_rel_t0[1]:
                        offset_b = offset_range[1] + t0indarr[i] - ta
                    else:
                        offset_b = offset_range[1]
                        
                #added this to deal with cases where you have a timing diode but don't want to remove voltage offset  
                elif tdiode_hdf is not None and remove_offset == False:
                    #Calculate the starting and ending arrays for the data
                    ta = t0indarr[ind] - min_t0ind
                    tb = ta + nti
                    offset_a = offset_range[0]
                    offset_b = offset_range[1]
                    
                else:
                    #By default, read in the entire dataset
                    ta = None
                    tb = None
                    offset_a = offset_range[0]
                    offset_b = offset_range[1]
                    
                if debug:
                    print("Data range: [" + str(ta) + "," + str(tb) + "]")
                    print("Offset range: [" + str(offset_a) + "," + 
                                          str(offset_b) + "]")
                    
                    

                #Read in the data from the source file
                dbx = srcgrp['data'][i,ta:tb, 0]
                dby = srcgrp['data'][i,ta:tb, 1]
                dbz = srcgrp['data'][i,ta:tb, 2]
                
                
                if remove_offset:
                     #Remove offset from each channel
                     dbx = dbx - np.mean(dbx[offset_a:offset_b])
                     dby = dby - np.mean(dby[offset_a:offset_b])
                     dbz = dbz - np.mean(dbz[offset_a:offset_b])
                     
                                
                if integrate:
                     #Intgrate
                     bx = np.cumsum(dbx)*dt
                     by = np.cumsum(dby)*dt
                     bz = np.cumsum(dbz)*dt
                else:
                    bx,by,bz = dbx, dby, dbz
                
                
                if calibrate:
                     #Apply the high-frequency calibration if one was
                     #provided
                     if calBx is not None and highfreq_calibrate:
                          bx = bx + calBx*dbx
                          by = by + calBy*dby
                          bz = bz + calBz*dbz

                     #Apply the low-frequency calibration factors
                     #Probe pol dir is included in these
                     bx = bx*calAx
                     by = by*calAy
                     bz = bz*calAz
                
                
                #If a motion_format is set, apply the appropriate probe angle correction
                if motion_format == 'cartesian' and angle_correction:
                    #Don't need to make any correction
                    pass 
                elif motion_format == 'fixed_pivot' and angle_correction:
                    #x,y,z is the probe's current position
                    x,y,z = srcgrp['pos'][i, :]
                    #rx, ry, rz is the location of the probe rotation point
                    #i.e. the center of the ball valve.
                    rx, ry, rz = attrs['rot_center_x'][0],attrs['rot_center_y'][0],attrs['rot_center_z'][0]
                    #x-rx, y-ry, z-rz is a vector pointing along the probe
                    #shaft towards the probe tip
                    #pitch is the angle of the probe shaft to the xz plane
                    pitch = np.arctan( (y-ry) / (x-rx) )
                    #yaw is the angle of the probe shaft to the xy plane
                    yaw = np.arctan( (z-rz) / (x-rx) )
                    
                    
                    if debug:
                         print("****Fixed Pivot Debug*******")
                         print("(x,y,z) = ({:5.2f},{:5.2f},{:5.2f})".format(x,y,z))
                         print("(rx,ry,rz) = ({:5.2f},{:5.2f},{:5.2f})".format(rx,ry,rz))
                         print("Pitch: " + str(np.degrees(pitch)))
                         print("Yaw: " + str(np.degrees(yaw)))
                    
                    
                    #If the probe is coming from the -X direction, its calibrated Z axis is already off by 180 degrees.
                    #This is because the probes are calibrated to match the East side of LAPD
                    if ((x-rx) > 0.0):
                        yaw = yaw + np.pi
                    
                    #Roll is rotation of the probe about its axis, with
                    #y+ oriented up as roll=0
                    #This should be zero, unless a probe was later discovered
                    #to be incorrectly calibrated, so that the +Y mark was
                    #wrong
                    roll, unit = attrs['roll']
                    if unit != 'rad':
                        np.radians(roll)
              
                    #Matrix is the first Tait-Bryan matrix XZY from https://en.wikipedia.org/wiki/Euler_angles
                    #1 -> roll
                    #2 -> pitch
                    #3 -> yaw
                    bx = (np.cos(pitch)*np.cos(yaw)*bx - 
                        np.sin(pitch)*by  + 
                        np.cos(pitch)*np.sin(yaw)*bz)
                    
                    by =  ((np.sin(roll)*np.sin(yaw) + np.cos(roll)*np.cos(yaw)*np.sin(pitch))*bx +
                           np.cos(roll)*np.cos(pitch)*by  +
                           (np.cos(roll)*np.sin(pitch)*np.sin(yaw) - np.cos(yaw)*np.sin(roll))*bz)
                    
                    bz =  ((np.cos(yaw)*np.sin(roll)*np.sin(pitch) - np.cos(roll)*np.sin(yaw))*bx + 
                           np.cos(pitch)*np.sin(roll)*by  +
                           (np.cos(roll)*np.cos(yaw) + np.sin(roll)*np.sin(pitch)*np.sin(yaw))*bz)
                
                    
                if grid:
                    #Get location to write this datapoint from the shotgridind
                    xi = shotgridind[ind, 0]
                    yi = shotgridind[ind, 1]
                    zi = shotgridind[ind, 2]
                    repi = shotgridind[ind, 3]
                    #Write data
                    try:
                        #print(f"length destgrp selected {len(destgrp['data'][:, xi, yi, zi, repi, 0])}")
                        destgrp['data'][:, xi, yi, zi, repi, 0] = bx
                        destgrp['data'][:, xi, yi, zi, repi, 1] = by
                        destgrp['data'][:, xi, yi, zi, repi, 2] = bz
                    except ValueError as e:
                        print("ERROR!")
                        print(destgrp['data'].shape)
                        print(bx.shape)
                        print([xi, yi, zi, repi])
                        raise(e)
                else:
                    #Write data
                    destgrp['data'][ind,:, 0] = bx
                    destgrp['data'][ind,:, 1] = by 
                    destgrp['data'][ind,:, 2] = bz                      
            

            if verbose:
                print("Writing axes to destination file")
            
            
            #Write the axes as required by the format of the data written
            if motion_format is not None:
                #Add the other axes and things we'd like in this file
                destgrp.require_dataset('pos', (nshots, 3), np.float32, chunks=True)[:] = srcgrp['pos'][0:nshots]
                for k in srcgrp['pos'].attrs.keys():
                    destgrp['pos'].attrs[k] = srcgrp['pos'].attrs[k]

            if grid:
                dimlabels = ['time', 'xaxis', 'yaxis', 'zaxis', 'reps', 'chan']
                
                destgrp.require_dataset('xaxis', (nx,), np.float32, chunks=True)[:] = xaxis
                destgrp['xaxis'].attrs['unit'] = attrs['motion_unit'][0]
                
                destgrp.require_dataset('yaxis', (ny,), np.float32, chunks=True)[:] = yaxis
                destgrp['yaxis'].attrs['unit'] = attrs['motion_unit'][0]
                
                destgrp.require_dataset('zaxis', (nz,), np.float32, chunks=True)[:] = zaxis
                destgrp['zaxis'].attrs['unit'] = attrs['motion_unit'][0]
                
                destgrp.require_dataset('reps', (nreps,), np.int32, chunks=True)[:] = np.arange(nreps)
                destgrp['reps'].attrs['unit'] = ''

            else:
                dimlabels = ['shots', 'time', 'chan']
                
                destgrp.require_dataset('shots', (nshots,), np.int32, chunks=True)[:] = srcgrp['shots'][:]
                destgrp['shots'].attrs['unit'] = srcgrp['shots'].attrs['unit']
            
            
            
            destgrp.require_dataset('chan', (nchan,), np.int32, chunks=True)[:] = srcgrp['chan'][:]
            destgrp['chan'].attrs['unit'] = srcgrp['chan'].attrs['unit']
            
            destgrp.require_dataset('time', (nti,), np.float32, chunks=True)
            destgrp['time'][:] = t
            destgrp['time'].attrs['unit'] = srcgrp['time'].attrs['unit']

           
            if calibrate:
                 destgrp['data'].attrs['unit'] = 'G'
            else:
                 destgrp['data'].attrs['unit'] = 'V'
                 
            destgrp['data'].attrs['dimensions'] = [s.encode('utf-8') for s in dimlabels]
            
            
            del(bx,by,bz)

            if verbose:
                print("End of BDOT routine!")
                
            return True
Exemplo n.º 9
0
def isatRawToFull(src,
                  dest,
                  ti=1.0,
                  mu=4.0,
                  tdiode_hdf=None,
                  grid=False,
                  offset_range=(0, 100),
                  offset_rel_t0=(False, False),
                  verbose=False,
                  debug=False,
                  grid_precision=0.1,
                  strict_grid=False,
                  strict_axes=False):
    """ Integrates isat Langmuir probe data, calibrates output using information about the probe.

    Parameters
    ----------
        src: hdfPath object
            Path string to a raw hdf5 file containing data
            
        dest: hdfPath object
            Path string to location processed data should be written out
            
        ti: Ion temperature (eV). Default assumption is 1 eV, which is typical
        of the LAPD LaB6 plasma. Scaling is as 1/sqrt(Ti).
        
        
        mu: Ion mass number (m_i/m_p = mu). Default is 4.0, for Helium.

        tdiode_hdf:  hdfPath object
            Path to a raw hdf5 file containing tdiode data. If no HDF file is
            provided, no timing correction will be applied.
            
        grid: Boolean
            If grid is true, output will be written in cartesian grid array
            format, eg. [nti, nx, ny, nz, nreps, nchan]. Otherwise, output will
            be in [nshots, nti, nchan] format
            
        offset_range: tuple
            Tuple of indices between which the average of the signal will be
            computed and subtracted from the entire signal to correct for
            offset. This should be a segment with just noise, ideally at the
            very beginning of the dataset. Longer is better. 
            Default is (0,100)
            
        offset_rel_t0: Tuple of booleans
            If either of these values is set to True, the coorresponding
            offset_range value will be taken to be relative to the t0 index
            for that each shot. For example, if t0=2000 for a shot, 
            offset_range=(10, -100), and offset_rel_t0 = (False, True), then
            the offset will be computed over the range (10, 1900)

            
        grid_precision: float
            This is the precision to which position values will be rounded
            before being fit onto the grid. Only applies to fuzzy axis and grid
            creation.
            
        strict_axes: boolean
            If true, attempt to calculate axes from saved grid parameters.
            Default is false, which attempts to calculate axes by looking at
            position values.
            
        strict_grid: boolean
            If true, strictly unravel data onto the axes, assuming the probe
            moved in order reps->X->Y->Z. This will NOT correctly handle
            points where the probe was not at the requested position. Default
            is false, which applys "fuzzy gridding", which tries to find the
            best grid position for each shot individually.


    Returns
    -------
       True (if executes to the end)
    """

    # ******
    # Load data from the raw HDF file
    # ******
    with h5py.File(src.file, 'r') as sf:

        #Get the datagroup
        srcgrp = sf[src.group]

        #Create dictionary of attributes
        attrs = hdftools.readAttrs(srcgrp)

        #Check for keys always required by this function
        req_keys = [
            'area', 'atten', 'gain', 'resistor', 'dir', 'pol',
            'probe_origin_x', 'probe_origin_y', 'probe_origin_z', 'dt'
        ]

        if 'pos' in srcgrp:
            pos = srcgrp['pos'][:]  #Read the entire array in
        else:
            #If no position information is given, a single explicit position
            #is required.
            req_keys = req_keys + ['probe_xpos', 'probe_ypos', 'probe_zpos']
            grid = False  #Can't grid data if there's no pos array!

        #Process the required keys, throwing an error if any cannot be found
        csvtools.missingKeys(attrs, req_keys, fatal_error=True)

        #Extract the shape of the source data
        nshots, nti, nchan = srcgrp['data'].shape

        #If requested by keyword, apply gridding
        if grid:
            shotgridind, xaxis, yaxis, zaxis, nx, ny, nz, nreps, nshots = postools.grid(
                pos,
                attrs,
                strict_axes=strict_axes,
                strict_grid=strict_grid,
                grid_precision=grid_precision,
                invert=False)

        if verbose:
            print("Opening destination HDF file")

        #Create the destination file directory if necessary
        hdftools.requireDirs(dest.file)

        #Open the destination file
        #This exists WITHIN the open statement for the source file, so the
        #source file is open at the same time.

        #remove files if they already exist
        if os.path.exists(dest.file):
            os.remove(dest.file)

        with h5py.File(dest.file, 'a') as df:

            #Throw an error if this group already exists
            if dest.group is not '/' and dest.group in df.keys():
                raise hdftools.hdfGroupExists(dest)

            destgrp = df.require_group(dest.group)

            #Copy over attributes
            hdftools.copyAttrs(srcgrp, destgrp)

            #Load the time vector
            t = srcgrp['time']

            #If tdiode_hdf is set, load the pre-processed tdiode data
            if tdiode_hdf is not None:
                if verbose:
                    print("Loading tdiode array from file.")
                with h5py.File(tdiode_hdf.file, 'r') as sf:
                    grp = sf[tdiode_hdf.group]
                    t0indarr = grp['t0indarr'][:]
                    goodshots = grp['goodshots'][:]
                    tdiode_attrs = hdftools.readAttrs(grp)

                #If tdiode was digitized with a different dt, this correction
                #will be necessary
                dt_ratio = float(attrs['dt'][0]) / float(tdiode_attrs['dt'][0])
                t0indarr = (t0indarr / dt_ratio).astype(np.int32)

                #We will remove up to max_t0shift indices from each array such that
                #the t0 indices all line up.
                min_t0ind = np.min(t0indarr[goodshots])
                max_t0shift = np.max(t0indarr[goodshots]) - min_t0ind
                #Compute new nti
                nti = nti - max_t0shift

                t = t[0:nti] - t[min_t0ind]

            #Throw an error if this dataset already exists
            if 'data' in destgrp.keys():
                raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'")

            #Create the dataset 'data' appropriate to whether or not output
            #data will be gridded
            if verbose:
                print("Creating 'data' group in destination file")
            if grid:
                destgrp.require_dataset('data', (nti, nx, ny, nz, nreps),
                                        np.float32,
                                        chunks=(np.min([nti,
                                                        20000]), 1, 1, 1, 1),
                                        compression='gzip')
            else:
                destgrp.require_dataset('data', (nshots, nti),
                                        np.float32,
                                        chunks=(1, np.min([nti, 20000])),
                                        compression='gzip')

            dt = (attrs['dt'][0] * u.Unit(attrs['dt'][1])).to(u.s).value

            resistor = float(attrs['resistor'][0])  #Ohms
            area = (attrs['area'][0] * u.Unit(attrs['area'][1])).to(
                u.m**2).value

            #Initialize time-remaining printout
            tr = util.timeRemaining(nshots)

            if verbose:
                print("Beginning processing data shot-by-shot.")

            #Chunking data processing loop limits memory usage
            for i in range(nshots):

                #Update time remaining
                if verbose:
                    tr.updateTimeRemaining(i)

                #If a tdiode hdf was supplied, calculate the index correction
                #here
                if tdiode_hdf is not None:
                    #Calculate the starting and ending arrays for the data
                    ta = t0indarr[i] - min_t0ind
                    tb = ta + nti

                else:
                    #By default, read in the entire dataset
                    ta = None
                    tb = None

                if debug:
                    print("Data range: [" + str(ta) + "," + str(tb) + "]")

                #Read in the data from the source file
                voltage = srcgrp['data'][i, ta:tb, 0]

                #Calculate density
                #Equation is 2 from this paper: 10.1119/1.2772282
                #This is valid for the regime Te~Ti, which is approx true in
                #LAPD
                density = 1.6e9 * np.sqrt(mu) * voltage / (resistor * area
                                                           )  #cm^-3

                if grid:
                    #Get location to write this datapoint from the shotgridind
                    xi = shotgridind[i, 0]
                    yi = shotgridind[i, 1]
                    zi = shotgridind[i, 2]
                    repi = shotgridind[i, 3]
                    #Write data
                    try:
                        destgrp['data'][:, xi, yi, zi, repi] = density

                    except ValueError as e:
                        print("ERROR!")
                        print(destgrp['data'].shape)
                        print(voltage.shape)
                        print([xi, yi, zi, repi])
                        raise (e)
                else:
                    #Write data
                    destgrp['data'][i, :] = density

            if verbose:
                print("Writing axes to destination file")

            if grid:
                #Add the other axes and things we'd like in this file
                destgrp.require_dataset(
                    'pos', (nshots, 3), np.float32,
                    chunks=True)[:] = srcgrp['pos'][0:nshots]
                for k in srcgrp['pos'].attrs.keys():
                    destgrp['pos'].attrs[k] = srcgrp['pos'].attrs[k]

                dimlabels = ['time', 'xaxis', 'yaxis', 'zaxis', 'reps']

                destgrp.require_dataset('xaxis', (nx, ),
                                        np.float32,
                                        chunks=True)[:] = xaxis
                destgrp['xaxis'].attrs['unit'] = attrs['motion_unit'][0]

                destgrp.require_dataset('yaxis', (ny, ),
                                        np.float32,
                                        chunks=True)[:] = yaxis
                destgrp['yaxis'].attrs['unit'] = attrs['motion_unit'][0]

                destgrp.require_dataset('zaxis', (nz, ),
                                        np.float32,
                                        chunks=True)[:] = zaxis
                destgrp['zaxis'].attrs['unit'] = attrs['motion_unit'][0]

                destgrp.require_dataset('reps', (nreps, ),
                                        np.int32,
                                        chunks=True)[:] = np.arange(nreps)
                destgrp['reps'].attrs['unit'] = ''

            else:
                dimlabels = ['shots', 'time']

                destgrp.require_dataset('shots', (nshots, ),
                                        np.int32,
                                        chunks=True)[:] = srcgrp['shots'][:]
                destgrp['shots'].attrs['unit'] = srcgrp['shots'].attrs['unit']

            destgrp.require_dataset('time', (nti, ), np.float32, chunks=True)
            destgrp['time'][:] = t
            destgrp['time'].attrs['unit'] = srcgrp['time'].attrs['unit']

            destgrp['data'].attrs['unit'] = 'cm^{-3}'

            destgrp['data'].attrs['dimensions'] = [
                s.encode('utf-8') for s in dimlabels
            ]

            if verbose:
                print("End of isat Langmuir routine!")

            return True
Exemplo n.º 10
0
def vsweepLangmuirRawToFull(src,
                            ndest,
                            tdest,
                            grid=True,
                            verbose=False,
                            plots=False,
                            debug=False,
                            grid_precision=0.1,
                            strict_grid=False,
                            strict_axes=False):
    """ Fits sweept Langmuir probe data and creates two full save files
    containing the calculated density and temperature

    Parameters
    ----------
        src: hdfPath object
            Path string to a raw hdf5 file containing swept Langmuir probe data
            There should be two channels: the first being the Langmuir current
            and the second being the ramp voltage.
            
       ndest: hdfPath object
            Path string to location processed density data is written out
            
       tdest: hdfPath object
            Path string to location processed temperature data is written out
  
        grid: Boolean
            If grid is true, output will be written in cartesian grid array
            format, eg. [nti, nx, ny, nz, nreps, nchan]. Otherwise, output will
            be in [nshots, nti, nchan] format

        grid_precision: float
            This is the precision to which position values will be rounded
            before being fit onto the grid. Only applies to fuzzy axis and grid
            creation.
            
        strict_axes: boolean
            If true, attempt to calculate axes from saved grid parameters.
            Default is false, which attempts to calculate axes by looking at
            position values.
            
        strict_grid: boolean
            If true, strictly unravel data onto the axes, assuming the probe
            moved in order reps->X->Y->Z. This will NOT correctly handle
            points where the probe was not at the requested position. Default
            is false, which applys "fuzzy gridding", which tries to find the
            best grid position for each shot individually.


    Returns
    -------
       True (if executes to the end)
    """

    # ******
    # Load data from the raw HDF file
    # ******
    with h5py.File(src.file, 'r') as sf:

        #Get the datagroup
        srcgrp = sf[src.group]

        #Create dictionary of attributes
        attrs = hdftools.readAttrs(srcgrp)

        #Check for keys always required by this function
        req_keys = [
            'area', 'resistor', 'gain', 'atten', 'ramp_gain', 'ramp_atten',
            'probe_origin_x', 'probe_origin_y', 'probe_origin_z'
        ]

        if 'pos' in srcgrp:
            pos = srcgrp['pos'][:]  #Read the entire array in

        else:
            #If no position information is given, a single explicit position
            #is required.
            req_keys = req_keys + ['xpos', 'ypos', 'zpos']
            grid = False  #Can't grid data if there's no pos array!

        #Process the required keys, throwing an error if any cannot be found
        csvtools.missingKeys(attrs, req_keys, fatal_error=True)

        #Extract the shape of the source data
        nshots, nti, nchan = srcgrp['data'].shape

        #If requested by keyword, apply gridding
        if grid:
            shotlist, xaxis, yaxis, zaxis, nx, ny, nz, nreps, nshots = postools.grid(
                pos,
                attrs,
                strict_axes=strict_axes,
                strict_grid=strict_grid,
                grid_precision=grid_precision,
                invert=True)

        if verbose:
            print("Opening destination HDF files")

        #Create the destination file directory if necessary
        #hdftools.requireDirs(ndest.file)
        #hdftools.requireDirs(tdest.file)

        #Open the destination file
        #This exists WITHIN the open statement for the source file, so the
        #source file is open at the same time.

    #Check if the output files exist already: if so, delete them
        if os.path.exists(ndest.file):
            os.remove(ndest.file)
        if os.path.exists(tdest.file):
            os.remove(tdest.file)

        with h5py.File(ndest.file, 'a') as ndf:
            with h5py.File(tdest.file, 'a') as tdf:

                #Throw an error if this group already exists
                if ndest.group != '/' and ndest.group in ndf.keys():
                    raise hdftools.hdfGroupExists(ndest)
                if tdest.group != '/' and tdest.group in tdf.keys():
                    raise hdftools.hdfGroupExists(tdest)

                ndestgrp = ndf.require_group(ndest.group)
                tdestgrp = tdf.require_group(tdest.group)

                grps = [ndestgrp, tdestgrp]

                for grp in grps:
                    hdftools.copyAttrs(srcgrp, grp)
                    #Throw an error if this dataset already exists
                    if 'data' in grp.keys():
                        raise hdftools.hdfDatasetExists(
                            str(grp) + ' -> ' + "'data'")

                #Determine the time vector and nti
                #Assume first shot is representative of the vramp
                vramp = srcgrp['data'][0, :, 1]
                time = srcgrp['time'][:]
                peaktimes, start, end = find_sweeps(time, vramp, plots=plots)
                nti = len(peaktimes)
                time = peaktimes

                #Create the dataset 'data' appropriate to whether or not output
                #data will be gridded
                if verbose:
                    print("Creating 'data' group in destination file")
                if grid:
                    ndestgrp.require_dataset('data', (nti, nx, ny, nz),
                                             np.float32,
                                             chunks=True,
                                             compression='gzip')
                    ndestgrp.require_dataset('error', (nti, nx, ny, nz, 5),
                                             np.float32,
                                             chunks=True,
                                             compression='gzip')

                    tdestgrp.require_dataset('data', (nti, nx, ny, nz),
                                             np.float32,
                                             chunks=True,
                                             compression='gzip')
                    tdestgrp.require_dataset('error', (nti, nx, ny, nz, 5),
                                             np.float32,
                                             chunks=True,
                                             compression='gzip')
                else:
                    ndestgrp.require_dataset('data', (nti, ),
                                             np.float32,
                                             chunks=True,
                                             compression='gzip')
                    ndestgrp.require_dataset('error', (nti, 5),
                                             np.float32,
                                             chunks=True,
                                             compression='gzip')

                    tdestgrp.require_dataset('data', (nti, ),
                                             np.float32,
                                             chunks=True,
                                             compression='gzip')
                    tdestgrp.require_dataset('error', (nti, 5),
                                             np.float32,
                                             chunks=True,
                                             compression='gzip')

                probe_gain = float(attrs['gain'][0])
                probe_atten = float(attrs['atten'][0])
                ramp_gain = float(attrs['ramp_gain'][0])
                ramp_atten = float(attrs['ramp_atten'][0])

                resistor = float(attrs['resistor'][0])  #Ohms
                area = (attrs['area'][0] * u.Unit(attrs['area'][1])).to(
                    u.cm**2).value

                probe_calib = np.power(10, probe_atten / 20.0) / probe_gain
                ramp_calib = np.power(10, ramp_atten / 20.0) / ramp_gain

                if grid:
                    #Initialize time-remaining printout
                    tr = util.timeRemaining(nx * ny * nz, reportevery=20)
                    for xi in range(nx):
                        for yi in range(ny):
                            for zi in range(nz):
                                i = zi + yi * nz + xi * nz * ny

                                if verbose:
                                    tr.updateTimeRemaining(i)

                                s = shotlist[xi, yi, zi, :]

                                current = srcgrp['data'][
                                    s, :, 0] * probe_calib / resistor
                                vramp = srcgrp['data'][s, :, 1] * ramp_calib

                                #Average over shots
                                current = np.mean(current, axis=0)
                                vramp = np.mean(vramp, axis=0)

                                for ti in range(nti):
                                    a = int(start[ti])
                                    b = int(end[ti])

                                    vpp, kTe, esat, vthe, density, error = vsweep_fit(
                                        vramp[a:b],
                                        current[a:b],
                                        esat_range=None,
                                        exp_range=None,
                                        plots=False,
                                        area=area)

                                    ndestgrp['data'][ti, xi, yi, zi] = density
                                    ndestgrp['error'][ti, xi, yi,
                                                      zi, :] = error

                                    tdestgrp['data'][ti, xi, yi, zi] = kTe
                                    tdestgrp['error'][ti, xi, yi,
                                                      zi, :] = error

                else:  #Not gridded
                    current = srcgrp['data'][:, :, 0]
                    current = np.mean(current, axis=0) * probe_calib / resistor
                    vramp = srcgrp['data'][:, :, 1]
                    vramp = np.mean(vramp, axis=0) * ramp_calib

                    for ti in range(nti):
                        a = start[ti]
                        b = end[ti]
                        vpp, kTe, esat, vthe, density, error = vsweep_fit(
                            vramp[a:b],
                            current[a:b],
                            esat_range=None,
                            exp_range=None,
                            plots=False,
                            area=area)

                        ndestgrp['data'][ti] = density
                        ndestgrp['error'][ti, :] = error

                        tdestgrp['data'][ti] = kTe
                        tdestgrp['error'][ti, :] = error

                for grp in grps:
                    #Write the axes as required by the format of the data written
                    if grid:
                        grp.require_dataset(
                            'pos', (nshots, 3), np.float32,
                            chunks=True)[:] = srcgrp['pos'][0:nshots]
                        for k in srcgrp['pos'].attrs.keys():
                            grp['pos'].attrs[k] = srcgrp['pos'].attrs[k]

                        dimlabels = ['time', 'xaxis', 'yaxis', 'zaxis']
                        grp.require_dataset('xaxis', (nx, ),
                                            np.float32,
                                            chunks=True)[:] = xaxis
                        grp['xaxis'].attrs['unit'] = attrs['motion_unit'][0]

                        grp.require_dataset('yaxis', (ny, ),
                                            np.float32,
                                            chunks=True)[:] = yaxis
                        grp['yaxis'].attrs['unit'] = attrs['motion_unit'][0]

                        grp.require_dataset('zaxis', (nz, ),
                                            np.float32,
                                            chunks=True)[:] = zaxis
                        grp['zaxis'].attrs['unit'] = attrs['motion_unit'][0]
                    else:
                        dimlabels = ['time']

                    grp.require_dataset('time', (nti, ),
                                        np.float32,
                                        chunks=True)
                    grp['time'][:] = time
                    grp['time'].attrs['unit'] = srcgrp['time'].attrs['unit']

                    grp['data'].attrs['unit'] = 'G'

                ndestgrp['data'].attrs['unit'] = 'cm^{-3}'
                tdestgrp['data'].attrs['unit'] = 'eV'

                ndestgrp['data'].attrs['dimensions'] = [
                    s.encode('utf-8') for s in dimlabels
                ]
                tdestgrp['data'].attrs['dimensions'] = [
                    s.encode('utf-8') for s in dimlabels
                ]

            if verbose:
                print("End of Sweept Langmuir routine!")

            return True
Exemplo n.º 11
0
def imgSeqRawToFull(src, dest):
    with h5py.File(src.file, 'r') as sf:

        #Get the datagroup
        srcgrp = sf[src.group]

        #Create dictionary of attributes
        attrs = hdftools.readAttrs(srcgrp)

        #Check for keys always required by this function
        req_keys = ['dt']

        csvtools.missingKeys(attrs, req_keys, fatal_error=True)

        nframes, nxpx, nypx, nchan = srcgrp['data'].shape

        #Convert dt
        dt = (attrs['dt'][0] * u.Unit(attrs['dt'][1])).to(u.s).value

        #Reps is assumed to be 1 unless otherwise set
        if 'nreps' in attrs.keys() and not np.isnan(attrs['nreps'][0]):
            nreps = attrs['nreps'][0]
        else:
            nreps = 1

        nti = int(nframes / nreps)

        #t0 is the time of the first frame in the set
        if 't0' in attrs.keys() and not np.isnan(attrs['t0'][0]):
            t0 = (attrs['t0'][0] * u.Unit(attrs['t0'][1])).to(u.s).value
        else:
            t0 = 0

        #Laser t0 is the time when the laser fires
        #Time array will be shifted so this time is zero
        if 'camera_delay' in attrs.keys() and not np.isnan(
                attrs['camera_delay'][0]):
            camera_delay = (attrs['camera_delay'][0] *
                            u.Unit(attrs['camera_delay'][1])).to(u.s).value
        else:
            camera_delay = 0

        #dxdp is the pixel spacing in cm/px
        if 'dxdp' in attrs.keys() and not np.isnan(attrs['dxdp'][0]):
            dxdp = (attrs['dxdp'][0] * u.Unit(attrs['dxdp'][1])).to(u.cm).value
        else:
            dxdp = None

        if 'dydp' in attrs.keys() and not np.isnan(attrs['dydp'][0]):
            dydp = (attrs['dydp'][0] * u.Unit(attrs['dydp'][1])).to(u.cm).value
        else:
            dydp = None

        if 'x0px' in attrs.keys() and not np.isnan(attrs['x0px'][0]):
            x0px = (attrs['x0px'][0] * u.Unit(attrs['x0px'][1])).to(u.cm).value
        else:
            x0px = 0

        if 'y0px' in attrs.keys() and not np.isnan(attrs['y0px'][0]):
            y0px = (attrs['y0px'][0] * u.Unit(attrs['y0px'][1])).to(u.cm).value
        else:
            y0px = 0

        with h5py.File(dest.file, 'a') as df:
            destgrp = df.require_group(dest.group)

            destgrp.require_dataset("data", (nti, nxpx, nypx, nreps, nchan),
                                    np.float32,
                                    chunks=(1, nxpx, nypx, 1, 1),
                                    compression='gzip')
            destgrp['data'].attrs['unit'] = ''

            #Initialize time-remaining printout
            tr = util.timeRemaining(nti, reportevery=5)

            #Actually put the images into the file
            for i in range(nti):
                tr.updateTimeRemaining(i)

                a = i * nreps
                b = (i + 1) * nreps

                #print(str(a) + ":" + str(b))

                #Copy, re-shape, and write data to array
                arr = srcgrp['data'][a:b, ...]

                arr = np.moveaxis(arr, 0, 2)

                #arr = np.reshape(arr, [nreps, nxpx, nypx, nchan])
                destgrp['data'][i, ...] = arr

            #Write the attrs dictioanry into attributes of the new data group
            hdftools.writeAttrs(attrs, destgrp)

            dimlabels = []

            time = np.arange(nti) * dt + camera_delay - t0
            destgrp.require_dataset('time', (nti, ), np.float32,
                                    chunks=True)[:] = time
            destgrp['time'].attrs['unit'] = 's'
            dimlabels.append('time')

            if dxdp is not None:
                xaxis = (np.arange(nxpx) - x0px) * dxdp
                destgrp.require_dataset('xaxis', (nxpx, ),
                                        np.float32,
                                        chunks=True)[:] = xaxis
                destgrp['xaxis'].attrs['unit'] = 'cm'
                dimlabels.append('xaxis')
            else:
                destgrp.require_dataset('xpixels', (nxpx, ),
                                        np.float32,
                                        chunks=True)[:] = np.arange(nxpx)
                destgrp['xpixels'].attrs['unit'] = ''
                dimlabels.append('xpixels')

            if dydp is not None:
                yaxis = (np.arange(nypx) - y0px) * dydp
                destgrp.require_dataset('yaxis', (nypx, ),
                                        np.float32,
                                        chunks=True)[:] = yaxis
                destgrp['yaxis'].attrs['unit'] = 'cm'
                dimlabels.append('yaxis')
            else:
                destgrp.require_dataset('ypixels', (nypx, ),
                                        np.float32,
                                        chunks=True)[:] = np.arange(nypx)
                destgrp['ypixels'].attrs['unit'] = ''
                dimlabels.append('ypixels')

            destgrp.require_dataset('reps', (nreps, ), np.float32,
                                    chunks=True)[:] = np.arange(nreps)
            destgrp['reps'].attrs['unit'] = ''
            dimlabels.append('reps')

            destgrp.require_dataset('chan', (nchan, ), np.float32,
                                    chunks=True)[:] = np.arange(nchan)
            destgrp['chan'].attrs['unit'] = ''
            dimlabels.append('chan')

            destgrp['data'].attrs['dimensions'] = [
                s.encode('utf-8') for s in dimlabels
            ]
Exemplo n.º 12
0
def lapdToRaw( run, probe, hdf_dir, csv_dir, dest, verbose=False,
               trange=[0, -1]):
    """ Retreives the appropriate metadata for a run and probe in a given data
    directory, then reads in the data using the bapsflib module and saves
    it in a new hdf5 file.
    
    Parameters
    ----------
        run: int
            Run number
        
        probe: str
            Probe name
            
        hdf_dir: str (path)
            Path to the directory where HDF files are stored
            
        csv_dir: str(path)
            Path to the directory where metadata CSV's are stored
    

        dest: hdfPath object
            Path string to location data should be written out

        verbose: boolean
            Set this flag to true to enable print statements throughout the
            code, including a runtime-until-completion estimate during the
            data reading loop.
            
        trange: [start_index, end_index]
            Time range IN INDICES over which to load the data. -1 in the second
            index will be translated to nti-1

    Returns
    -------
       True, if execution is successful 
    """ 

    #Create a dictionary of attributes from the entire directory of CSV
    #files that applies to this probe and run
    attrs = csvtools.getAllAttrs(csv_dir, run, probe)
  
    #Check that some required keys are present, throw a fatal error if not
    req_keys = ['datafile', 'digitizer', 'adc']
    csvtools.missingKeys(attrs, req_keys, fatal_error=True)
        
    #Load some digitizer parameters we now know exist
    digitizer = attrs['digitizer'][0]
    adc = attrs['adc'][0]
    #TODO: Should this file take a data_dir and determine the filename
    #automatically, or should a source hdf file be given, leaving the program
    #that calls this one to determine the HDF file name?
    src =  os.path.join(hdf_dir,  attrs['datafile'][0] +  '.hdf5')
    
 
    #Create an array of channels (required input for bapsflib read_data)
    # channel_arr = array of tuples of form: (digitizer, adc, board#, channel#)
    # eg. channel_arr = ('SIS crate', 'SIS 3305', 2, 1)
    #Do this in a loop, so the number of channels is flexible
    #However, the number of 'brd' and 'chan' fields MUST match
    #AND, the keys must be of the format 'brd1', 'chan1', etc.
    channel_arr = []
    nchan = 1
    while True:
        brdstr = 'brd' + str(int(nchan))
        chanstr = 'chan' + str(int(nchan))
        if brdstr in attrs.keys() and chanstr in attrs.keys():
            #Check to make sure channel has actual non-nan values
            if not np.isnan(attrs[brdstr][0])  and not np.isnan(attrs[chanstr][0]):
                #Append the channel to the list to be extracted
                channel_arr.append( (digitizer, adc, attrs[brdstr][0], attrs[chanstr][0]) )
            nchan = nchan + 1
        else:
            break
    #Determine the number of channels from the channel array
    nchan = len(channel_arr)
        
    #Read some variables from the src file
    with bapsf_lapd.File(src, silent=True)  as sf:
        src_digitizers = sf.digitizers

        digi = src_digitizers['SIS crate'] #Assume this id the digitizer: it is the only one
        #Assume the adc, nti, etc. are all the same on all the channels.
        
        #This line assumes that only one configuration is being used
        #This is usually the case: if it is not, changes need to be made
        daq_config = digi.active_configs[0]
        
        name, info = digi.construct_dataset_name(channel_arr[0][2], 
                                                 channel_arr[0][3], 
                                                 adc=channel_arr[0][1],
                                                 config_name = daq_config,
                                                 return_info=True)
        #Read out some digitizer parameters
        nshots = info['nshotnum']
        nti = info['nt']
        
        #clock_rate = info['clock rate'].to(u.Hz)
        #dt =  (  1.0 / clock_rate  ).to(u.s)
        
        sti = trange[0]
        if trange[1] == -1:
            eti = nti-1
        else:
            eti = trange[1]
        
        nti = eti- sti

        

    
    #Check if keys are provided to specify a motion list
    # control = array of tuples of form (motion control, receptacle)
    # eg. controls = [('6K Compumotor', receptacle)]
    # note that 'receptacle' here is the receptacle NUMBER, 1 - indexed!)
    req_keys = ['motion_controller', 'motion_receptacle']    
    if csvtools.missingKeys(attrs, req_keys, fatal_error = False):
        print("Some motion keys not found: positon data will not be read out!")
        controls, pos = None, None
    else:
        motion_controller = attrs['motion_controller'][0]
        motion_receptacle = attrs['motion_receptacle'][0]
        controls = [(motion_controller, motion_receptacle)]
        
        #Check to see if the motion controller reported actually exists in the
        #hdf file. If not, assume the probe was stationary (motion=None)
        #If motion_controller isn't in this list, lapdReadHDF can't handle it
        #Check if the motion controller provided is supported by the code and
        if motion_controller in ['6K Compumotor', 'NI_XZ', 'NI_XYZ']:
            pos, attrs = readPosArray(src, controls, attrs)
        else:
            controls, pos = None, None
    
    #Create the destination file directory if necessary
    hdftools.requireDirs(dest.file)
    #Create the destination file
    with h5py.File(dest.file, "a") as df:

        #Create the dest group, throw error if it exists
        if dest.group is not '/' and dest.group in df.keys():
            raise hdftools.hdfGroupExists(dest)
        grp = df[dest.group]
        
        #Write the attrs dictioanry into attributes of the new data group
        hdftools.writeAttrs(attrs, grp)

        #Open the LAPD file and copy the data over
        with bapsf_lapd.File(src, silent=True) as sf:
            
            #Initialize the output data array
            if 'data' in grp.keys():
                raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'")

            #Create the dataset + associated attributes
            grp.require_dataset("data", (nshots, nti, nchan), np.float32, 
                                chunks=(1, np.min([nti, 20000]), 1), 
                                compression='gzip')
            grp['data'].attrs['unit'] = 'V'
            

            dimlabels = ['shots', 'time', 'chan']
            
            grp['data'].attrs['dimensions'] = [s.encode('utf-8') for s in dimlabels]
                

            #Initialize time-remaining printout
            tr = util.timeRemaining(nchan*nshots)
            
            #Loop through the channels and shots, reading one-by-one into the
            #output dataset
            for chan in range(nchan):
                channel = channel_arr[chan]
                if verbose:
                    print("Reading channel: " + str(chan+1) + '/' + str(nchan))

                for shot in range(nshots):
                    
                    if verbose:
                        tr.updateTimeRemaining(nshots*chan + shot)
                    
                    #Read the data through bapsflib
                    data = sf.read_data(channel[2], channel[3], digitizer =channel[0],
                                        adc = channel[1], config_name = daq_config, 
                                        silent=True, shotnum=shot+1)
                    
            
                    grp['data'][shot,:,chan] = data['signal'][0, sti:eti]
                    
                    if shot == 0:
                        dt = data.dt #Adusted in bapsflib for clock rate, avging, etc.
                        
                        grp.attrs['dt'] = [s.encode('utf-8') for s 
                                 in [str(dt.value), str(dt.unit)] ]
    
                

        #If applicable, write the pos array to file
        if pos is not None:
            grp.require_dataset('pos', (nshots, 3), np.float32)[:] = pos
            del pos
            
        
        #Create the axes
        grp.require_dataset('shots', (nshots,), np.float32, chunks=True )[:] = np.arange(nshots)
        grp['shots'].attrs['unit'] = ''
        
        t = np.arange(nti)*dt
        grp.require_dataset('time', (nti,), np.float32, chunks=True)[:] = t.value
        grp['time'].attrs['unit'] =  str(t.unit)
        
        grp.require_dataset('chan', (nchan,), np.float32, chunks=True)[:] = np.arange(nchan)
        grp['chan'].attrs['unit'] = ''
        
        
    #Clear the LAPD HDF file from memory
    del(sf, data, t)
   
    return dest
Exemplo n.º 13
0
def hrrToRaw(run, probe, hdf_dir, csv_dir, dest, verbose=False, debug=False):
    """ Retreives the appropriate metadata for a run and probe in a given data
    directory, then reads in the data from the HRR hdf5 output file.
    
    Parameters
    ----------
        run: int
            Run number
        
        probe: str
            Probe name
            
        hdf_dir: str (path)
            Path to the directory where HDF files are stored
            
        csv_dir: str(path)
            Path to the directory where metadata CSV's are stored
    

        dest: hdfPath object
            Path string to location data should be written out

        verbose: boolean
            Set this flag to true to enable print statements throughout the
            code, including a runtime-until-completion estimate during the
            data reading loop.

    Returns
    -------
       dest (Filepath to destination file)
    """

    #Create a dictionary of attributes from the entire directory of CSV
    #files that applies to this probe and run
    attrs = csvtools.getAllAttrs(csv_dir, run, probe)

    #Check that some required keys are present, throw a fatal error if not
    req_keys = ['datafile']
    csvtools.missingKeys(attrs, req_keys, fatal_error=True)

    #TODO: Should this file take a data_dir and determine the filename
    #automatically, or should a source hdf file be given, leaving the program
    #that calls this one to determine the HDF file name?
    src = os.path.join(hdf_dir, attrs['datafile'][0] + '.hdf5')

    #Create an array of channels
    #channel_arr = tuples of form (resource number, channel number)
    #Indexd from 1, to match load/LAPD.py
    channel_arr = []
    nchan = 1
    while True:
        digistr = 'resource' + str(int(nchan))
        chanstr = 'chan' + str(int(nchan))

        if chanstr in attrs.keys() and digistr in attrs.keys():
            #Check to make sure channel has actual non-nan values
            if not np.isnan(attrs[digistr][0]) and not np.isnan(
                    attrs[chanstr][0]):
                #Append the channel to the list to be extracted
                channel_arr.append((attrs[digistr][0], attrs[chanstr][0]))
            nchan = nchan + 1
        else:
            break

    if debug:
        print("{:.0f} Data Channels found in csv".format(len(channel_arr)))

    #Create a dictionary of position channels
    #channel_arr = tuples of form (resource number, channel number)
    ax = ['x', 'y', 'z']
    pos_chan = {}
    nchan = 1
    for i in range(3):
        digistr = ax[i] + 'pos_resource'
        chanstr = ax[i] + 'pos_chan'
        if chanstr in attrs.keys() and digistr in attrs.keys():
            #Check to make sure channel has actual non-nan values
            if not np.isnan(attrs[digistr][0]) and not np.isnan(
                    attrs[chanstr][0]):
                #Append the channel to the list to be extracted
                pos_chan[ax[i]] = (attrs[digistr][0], attrs[chanstr][0])
            else:
                pos_chan[ax[i]] = None
        else:
            pos_chan[ax[i]] = None

    if debug:
        print("{:.0f} Pos Channels found in csv".format(len(pos_chan)))

    #Determine the number of channels from the channel array
    nchan = len(channel_arr)

    #Read some variables from the src file
    with h5py.File(src, 'r') as sf:

        digi_name = 'RESOURCE ' + str(channel_arr[0][0])
        print(digi_name)
        digigrp = sf[digi_name]

        resource_type = digigrp.attrs['RESOURCE TYPE'].decode('utf-8')

        attrs['RESOURCE ALIAS'] = (
            digigrp.attrs['RESOURCE ALIAS'].decode('utf-8'), '')
        attrs['RESOURCE DESCRIPTION'] = (
            digigrp.attrs['RESOURCE DESCRIPTION'].decode('utf-8'), '')
        attrs['RESOURCE ID'] = (digigrp.attrs['RESOURCE ID'], '')
        attrs['RESOURCE MODEL'] = (
            digigrp.attrs['RESOURCE MODEL'].decode('utf-8'), '')
        attrs['RESOURCE TYPE'] = (resource_type, '')
        resource_unit = digigrp['CHANNEL 0']['UNITS'][0].decode('utf-8')

        attrs['motion_unit'] = ('mm', '')

        if resource_type == 'SCOPE':
            dataname = 'TRACE'
            nshots = digigrp['CHANNEL 0'][dataname].shape[0]
            nti = digigrp['CHANNEL 0'][dataname].shape[1]
            dt = digigrp['CHANNEL 0'][dataname].attrs['WAVEFORM DT'] * u.s

            attrs['dt'] = [str(dt.value), str(dt.unit)]

            #attrs['dt'] = [s.encode('utf-8') for s
            #     in [str(dt.value), str(dt.unit) ] ]

        elif resource_type == 'MOTOR BOARD':
            dataname = 'POSITION'
            nshots = digigrp['CHANNEL 0'][dataname].shape[0]
            nti = 1

    #Create the destination file
    with h5py.File(dest.file, "a") as df:

        #Create the dest group, throw error if it exists
        if dest.group is not '/' and dest.group in df.keys():
            raise hdftools.hdfGroupExists(dest)
        grp = df[dest.group]

        #Initialize the output data array
        if 'data' in grp.keys():
            raise hdftools.hdfDatasetExists(str(dest) + ' -> ' + "'data'")

        #Create the dataset + associated attributes
        grp.require_dataset("data", (nshots, nti, nchan),
                            np.float32,
                            chunks=(1, np.min([nti, 20000]), 1),
                            compression='gzip')
        grp['data'].attrs['unit'] = resource_unit

        dimlabels = ['shots', 'time', 'chan']

        grp['data'].attrs['dimensions'] = [
            s.encode('utf-8') for s in dimlabels
        ]

        #Open the hdf5 file and copy the data over
        with h5py.File(src) as sf:

            #Initialize time-remaining printout
            tr = util.timeRemaining(nchan * nshots)

            #Loop through the channels and shots, reading one-by-one into the
            #output dataset
            for chan in range(nchan):
                digi_name = 'RESOURCE ' + str(channel_arr[chan][0])
                chan_name = 'CHANNEL ' + str(channel_arr[chan][1])
                if verbose:
                    print("Reading channel: " + str(chan + 1) + '/' +
                          str(nchan))
                for shot in range(nshots):

                    if verbose:
                        tr.updateTimeRemaining(nshots * chan + shot)
                    #Read the data from the hdf5 file
                    grp['data'][shot, :,
                                chan] = sf[digi_name][chan_name][dataname][
                                    shot, ...]

            if pos_chan['x'] is not None or pos_chan[
                    'y'] is not None or pos_chan['z'] is not None:

                grp.require_dataset('pos', (nshots, 3), np.float32)
                ax = ['x', 'y', 'z']

                unit_factor = (1.0 * u.Unit(attrs['motion_unit'][0])).to(
                    u.cm).value
                attrs['motion_unit'] = ('cm', '')

                for i, a in enumerate(ax):
                    if pos_chan[a] is not None:
                        resname = 'RESOURCE ' + str(pos_chan[a][0])
                        channame = 'CHANNEL ' + str(int(pos_chan[a][1]))

                        try:
                            posdata = sf[resname][channame][
                                'POSITION'][:] * unit_factor
                        except KeyError:
                            print("(!) POSITION Information not found for " +
                                  resname)
                            print(
                                "If motion is not included in run, set resource to NA in csv"
                            )

                        #Handle the case where the multiple data points were
                        #taken at a position so npos!=nshots
                        npos = posdata.size
                        if npos != nshots:
                            posdata = np.repeat(posdata, int(nshots / npos))

                        grp['pos'][:, i] = posdata

                    else:
                        grp['pos'][:, i] = np.zeros(nshots)

        #Create the axes
        grp.require_dataset('shots', (nshots, ), np.float32,
                            chunks=True)[:] = np.arange(nshots)
        grp['shots'].attrs['unit'] = ''

        grp.require_dataset('chan', (nchan, ), np.float32,
                            chunks=True)[:] = np.arange(nchan)
        grp['chan'].attrs['unit'] = ''

        if resource_type == 'SCOPE':
            t = np.arange(nti) * dt
            grp.require_dataset('time', (nti, ), np.float32,
                                chunks=True)[:] = t.value
            grp['time'].attrs['unit'] = str(t.unit)

        #Write the attrs dictioanry into attributes of the new data group
        hdftools.writeAttrs(attrs, grp)

    return dest