示例#1
0
def getwavegrps(infiles, nsamp=None):
    '''
	For a sequence infiles of input WaveformMap files, prepare a mapping
	from transmit-receiver pairs to a list of Waveform objects representing
	backscatter waves observed at the pair. If the same WaveformMap key is
	duplicated in multiple input files, the list corresponding to that key
	will contain each Waveform in an order tha tmatches the lexicographical
	ordering of the inputs.
	
	If nsamp is not None, the nsamp property of each Waveform object will
	be overridden.

	Only element indices whose Waveform lists have a length that matches
	that of the longest Waveform list will be included.
	'''
    wavegrps = defaultdict(list)

    for infile in sorted(infiles):
        wmap = WaveformMap.load(infile, dtype='float64')
        if nsamp: wmap.nsamp = nsamp
        for (t, r), wave in wmap.items():
            wavegrps[t, r].append(wave)

    # Filter the list to exclude short lists
    maxlen = max(len(w) for w in wavegrps.values())
    return {k: v for k, v in wavegrps.items() if len(v) == maxlen}
示例#2
0
	parser.add_argument('inputs', type=str, nargs='+',
			help='Input WaveformMap files from which to extract')

	args = parser.parse_args(sys.argv[1:])

	# Try to read all input WaveformMap files
	infiles = matchfiles(args.inputs)

	# Read a defined receive-to-transmit-list map
	if args.trmap: args.trmap = loadkeymat(args.trmap, scalar=False)

	# At first, clobber the output
	append = False

	for infile in infiles:
		wmap = WaveformMap.load(infile)

		# Build the appropriate subset of the WaveformMap
		if not args.backscatter: wvs = trextract(wmap, args.trmap, args.random)
		else: wvs = ((k, v) for k, v in wmap.items() if k[0] == k[1])
		omap = WaveformMap(wvs)

		if args.output:
			# Save to common output and switch to append mode
			omap.store(args.output, compression=args.compression, append=append)
			append = True
		else:
			output = os.path.splitext(infile)[0] + 'extract.wmz'
			omap.store(output, compression=args.compression, append=False)
示例#3
0
        # Load the backscatter waves in groups by element
        wavegrps = getwavegrps(args.inputs, args.nsamp)
        if args.atimes and not args.skip_alignment:
            # Shift waveforms if arrival times are provided
            wavegrps = shiftgrps(wavegrps, args.atimes, args.suppress)
            # Strip out the subsequent (realigned) times
            args.atimes = {k: [v[0]] for k, v in args.atimes.items()}
            print('Shifted waveform groups')
        print('Storing waveform video to file', args.output)
        plotframes(args.output, wavegrps, args.atimes, args.window,
                   args.equalize, args.thresh, args.bitrate, args.one_sided)
    else:
        # Load the waveforms
        waves = WaveformMap()
        for inf in args.inputs:
            wm = WaveformMap.load(inf, dtype='float64')
            if args.nsamp: wm.nsamp = args.nsamp
            waves.update(wm)

        # There is no mean arrival time unless arrival times are provided
        mtime = None

        if args.atimes:
            # Find the mean arrival time for all waveforms
            celts = set(waves).intersection(args.atimes)
            print(f'{len(celts)} waveforms have associated arrival times')
            mtime = int(np.mean([args.atimes[c] for c in celts]))

            if args.suppress: print('Will suppress unaligned waveforms')
            elif args.zero: print('Will zero unaligned waveforms')
示例#4
0
def calcdelays(datafile, reffile, osamp=1, rank=0, grpsize=1, **kwargs):
    '''
	Given a datafile containing a habis.sigtools.WaveformMap, find arrival
	times using cross-correlation or IMER for waveforms returned by

	  wavegen(data, rank=rank, grpsize=grpsize, **exargs),

	where data is the WaveformMap encoded in datafile and exargs is a
	subset of kwargs as described below.

	For arrival times determined from cross-correlation, a reference
	waveform (as habis.sigtools.Waveform) is read from reffile. For IMER
	arrival times, reffile is ignored.

	The return value is a 2-tuple containing, first, a dictionary that maps
	a (t,r) transmit-receive index pair to delay in samples; and, second, a
	dictionary that maps stat groups to counts of waveforms that match the
	stats.

	Optional keyword arguments include:

	* flipref: A Boolean (default: False) that, when True, causes the
	  refrence waveform to be negated when read.

	* nsamp: Override data.nsamp. Useful mainly for bandpass filtering.

	* negcorr: A Boolean (default: False) passed to Waveform.delay as the
	  'negcorr' argument to consider negative cross-correlation.

	* signsquare: Square the waveform and reference amplitudes (multiplying
	  each signal by its absolute value to preserve signs) to better
	  emphasize peaks in the cross-correlation. The squaring is done right
	  after any bandpass filtering, so other parameters that are influence
	  by amplitude (e.g., minsnr, thresholds in peaks) should be altered to
	  account for the squared amplitudes.

	* minsnr: A sequence (mindb, noisewin) used to define the minimum
	  acceptable SNR in dB (mindb) by comparing the peak signal amplitude
	  to the minimum standard deviation over a sliding window of width
	  noisewin. SNR for each signal is calculated after application of an
	  optional window. Delays will not be calculated for signals fail to
	  exceed the minimum threshold.

	* denoise: If not None, a dictionary suitable for passing as keyword
	  arguments (**denoise) to Waveform.denoise to use CFAR rejection of
	  the Gabor spectrogram to isolate the signal. Denoising is done after
	  minimum-SNR rejection to avoid too many false matches with
	  very-low-noise signals.

	* peaks: A dictionary suitable for passing as keyword arguments
	  (**peaks) to the isolatepeak function, excluding the first three
	  arguments.

	  *** NOTE: peak windowing is done after overall windowing and after
	  possible exclusion by minsnr. ***

	* delaycache: A map from transmit-receive element pairs (t, r) to a
	  precomputed delay d. If a value exists for a given pair (t, r) in the
	  WaveformMap and the element map, the precomputed value will be used
	  in favor of explicit computation.

	* queue: If not none, the return values are passed as an argument to
	  queue.put().

	* eleak: If not None, a floating-point value in the range [0, 1) that
	  specifies the maximum permissible fraction of the total signal energy
	  that may arrive before identified arrival times. Any waveform for
	  which the fraction of total energy arriving before the arrival time
	  exceeds eleak will be rejected as unacceptable.

	  Estimates of energy leaks ignore any fractional parts of arrival
	  times. Energy leaks are estimated after any bandpass filtering or
	  windowing. Estimates never consider peak isolation.

	* imer: A dictionary to provide all but the first argument of
	  getimertime. If this is provided, getimertime will be used instead of
	  (optional) peak isolation and cross-correlation to determine an
	  arrival time.

	* elements: If not None, an N-by-3 array or a map from element indices
	  to coordinates. If wavegen returns a neighborhood of more than one
	  transmit-receive pair for any arrival time, the element coordinates
	  will be used to find an optimal (in the least-squares sense) slowness
	  to predict arrivals observed in the neighborhood.

	  If an arrival-time measurement for the "key" pair in a measurement
	  neighborhood is available and average slowness imputed by this
	  arrival time falls within 1.5 IQR of the average slowness values for
	  all pairs in the neighborhood, or if the neighborhood consists of
	  only the key measurement pair, the arrival time for the "key" pair is
	  used without modification.

	  If the arrival time for a key pair is missing from the neighborhood,
	  or falls outside of 1.5 IQR, the arrival time for the key pair will
	  be the optimum slowness value for the neighborhood multiplied by the
	  propagation distance for the pair.

	  Element coordinates are required if wavegen returns neighborhoods of
	  more than one member.

	Any unspecified keyword arguments are passed to wavegen.
	'''
    # Read the data and reference
    data = WaveformMap.load(datafile)

    # Pull a copy of the IMER configuration, if it exists
    imer = dict(kwargs.pop('imer', ()))

    # Read the reference if IMER times are not desired
    if not imer:
        if reffile is None: raise ValueError('Must specify reffile or imer')
        ref = Waveform.fromfile(reffile)
    else:
        ref = None
    # Negate the reference, if appropriate
    if kwargs.pop('flipref', False) and ref is not None: ref = -ref

    # Unpack the signsquare argument and flip the reference if necessary
    signsquare = kwargs.pop('signsquare', False)
    if signsquare and ref is not None: ref = ref.signsquare()

    # Override the sample count, if desired
    try:
        nsamp = kwargs.pop('nsamp')
    except KeyError:
        pass
    else:
        data.nsamp = nsamp

    # Determine if an energy "leak" threshold is desired
    try:
        eleak = float(kwargs.pop('eleak'))
    except KeyError:
        eleak = None
    else:
        if not 0 <= eleak < 1:
            raise ValueError('Argument eleak must be in range [0, 1)')

    # Unpack minimum SNR requirements
    minsnr, noisewin = kwargs.pop('minsnr', (None, None))

    # Pull the optional peak search criteria
    peaks = dict(kwargs.pop('peaks', ()))

    # Pull the optional denoising criteria
    denoise = dict(kwargs.pop('denoise', ()))

    # Determine whether to allow negative correlations
    negcorr = kwargs.pop('negcorr', False)

    # Grab an optional delay cache
    delaycache = kwargs.pop('delaycache', {})

    # Grab an optional result queue
    queue = kwargs.pop('queue', None)

    # Element coordinates, if required
    elements = kwargs.pop('elements', None)

    # Pre-populate cached values
    result = {k: delaycache[k] for k in set(data).intersection(delaycache)}
    # Remove the cached waveforms from the set
    for k in result:
        data.pop(k, None)
    # Only keep a local portion of cached values
    result = {k: result[k] for k in sorted(result)[rank::grpsize]}

    wavestats = defaultdict(int)
    wavestats['cached'] = len(result)

    grpdelays = defaultdict(dict)

    # Process waveforms (possibly averages) as generated
    for key, sig, nbrs in wavegen(data, rank=rank, grpsize=grpsize, **kwargs):
        # Square the signal if desired
        if signsquare: sig = sig.signsquare()

        if minsnr is not None and noisewin is not None:
            if sig.snr(noisewin) < minsnr:
                wavestats['low-snr'] += 1
                continue

        if denoise: sig = sig.denoise(**denoise)

        # Calculate cumulative energy in unwindowed waveform
        if eleak: cenergy = np.cumsum(sig.data**2)

        if imer:
            # Compute IMER time
            try:
                dl = getimertime(sig, osamp=osamp, **imer)
                # Compute IMER and its mean
            except IndexError:
                wavestats['failed-IMER'] += 1
                continue
        else:
            if peaks:
                try:
                    sig = isolatepeak(sig, key, **peaks)
                except ValueError:
                    wavestats['missing-peak'] += 1
                    continue

            # Compute and record the delay
            dl = sig.delay(ref, osamp=osamp, negcorr=negcorr)
            if negcorr:
                if dl[1] < 0: wavestats['negative-correlated'] += 1
                dl = dl[0]

        if eleak:
            # Evaluate leaked energy
            ssamp = int(dl) - sig.datawin.start - 1
            if not 0 <= ssamp < len(cenergy):
                wavestats['out-of-bounds'] += 1
                continue
            elif cenergy[ssamp] >= eleak * cenergy[-1]:
                wavestats['leaky'] += 1
                continue

        if len(nbrs) < 2:
            # If the element is its own neighborhood, just copy result
            if key in nbrs:
                wavestats['sole-valid'] += 1
                result[key] = dl
            else:
                wavestats['invalid-neighborhood'] += 1
        else:
            # Results will be optimized from groups of delays
            for nbr in nbrs:
                grpdelays[nbr][key] = dl

    if grpdelays and elements is None:
        raise TypeError('Cannot have neighborhoods when elements is None')

    for key, grp in grpdelays.items():
        if key[0] == key[1] or any(t == r for t, r in grp):
            raise ValueError('Backscatter neighborhoods not supported')

        pdist, slw = {}, {}
        try:
            # Find distances and speeds for neighborhoods
            for (t, r), dl in grp.items():
                v = norm(elements[t] - elements[r])
                pdist[t, r] = v
                slw[t, r] = dl / v
        except (KeyError, IndexError):
            # Either coordinates or a delay do not exist for
            wavestats['unknown-pair'] += 1
            continue

        # Eliminate outliers based on slowness; discard slowness values
        slw = set(stats.mask_outliers(slw))

        if key in slw:
            result[key] = grp[key]
            wavestats['valid-in-neighborhood'] += 1
        else:
            wavestats['outlier'] += 1

    try:
        queue.put((result, wavestats))
    except AttributeError:
        pass

    return result, stats