示例#1
0
def makesendbufs(wmap, destmap, nmax=10000):
    '''
	Given a WaveformMap wmap and a destmap, as produced by keyroute, that
	maps ranks in an MPI communicator to sets of keys in wmap that should
	be sent to that rank, prepare and return a map from destination ranks
	to a list of BytesIO buffers that each hold a serialized representation
	of subset (of at most nmax Waveforms) of wmap to be sent to that rank.
	'''
    # Assign the buffers to target ranks
    buffers = defaultdict(list)

    for rank, rkeys in destmap.items():
        remaining = list(rkeys.intersection(wmap))
        while remaining:
            # Build a submap to serialize
            rmap = WaveformMap((k, wmap[k]) for k in remaining[:nmax])
            # Serialize to a BytesIO stream
            bstr = io.BytesIO()
            rmap.store(bstr)
            # Append the buffer to the map
            buffers[rank].append(bstr)
            # Discard the serialized portion
            remaining = remaining[nmax:]

    return buffers
示例#2
0
def fhfft(infile, outfile, groupmap, **kwargs):
    '''
	For a real WaveformSet file infile, perform Hadamard decoding and then
	a DFT of the temporal samples. The Hadamard decoding follows the
	grouping configuration stored in groupmap, a map

		(element index) -> (local Hadamard index, group number)

	that defines Hadamard groups and must agree with the local group
	configuration represented in the input. The resulting transformed
	records will be stored in the output outfile. The nature of outfile
	depends on the optional argument trmap (see below).

	If trmap is not provided, all records will be written as a binary blob;
	the outfile should be a single string providing the location of the
	output. The output will have shape Ns x Nt x Nr, where Ns is the number
	of output samples per waveform (as governed by the spectral or temporal
	windows applied), Nt is the number of input transmit channels, and Nr
	is the number of input receive channels.

	If trmap is provided, outfile should be a one-to-one map from the keys
	of trmap to output files. A WaveformMap object will be created for each
	key in trmap and stored at the location indicated by the corresponding
	value in outfile.

	Output file(s) will be created or truncated.

	Any TGC parameters in the input, accessible as wset.context['tgc'],
	will be used to adjust the amplitudes of the waveforms prior to
	applying Hadamard and Fourier transforms.

	The kwargs contain optional values or default overrides:

	* freqs (default: None): When not None, a sequence (start, end)
	  to be passed as slice(start, end) to bandpass filter the input after
	  Hadamard decoding.

	* rolloff (default: None): When not None, an integer that defines the
	  half-width of a Hann window that rolls off the bandpass filter
	  specified in freqs. Ignored if freqs is not provided.

	* nsamp (default: None): The length of the time window over which
	  waveforms are considered (and DFTs are performed), starting from
	  global time 0 (i.e., without consideration for input F2C). If None,
	  the value of nsamp in the input is used.

	  ** NOTE: Because the time window always starts at global time 0,
	  a waveform with a data window (start, length) will be cropped when
	  (f2c + start + length) > nsamp, even if nsamp is the value encoded in
	  the file.

	* tgcsamps (default: 16 [for integer datatypes] or 0 [else]): The
	  number of temporal samples to which a single TGC parameter applies.
	  Signals will be scaled by an appropriate section of the multiplier

	    mpy = (invtgc[:,np.newaxis] *
		    np.ones((ntgc, tgcsamps), dtype=np.float32)).ravel('C'),

	  where the values invtgc = 10.**(-wset.context['tgc'] / 20.) and
	  ntgc = len(wset.context['tgc']). The multiplier mpy is defined over a
	  window that starts at file sample 0 (global time wset.f2c).

	  Set tgcsamps to 0 (or None) to disable compensation. If the
	  WaveformSet includes TGC parameters and tgcsamps is a positive
	  integer, then len(mpy) must be at least long enough to encompass all
	  data windows encoded in the file.

	* tgcmap (default: None): If provided, should be a two-column, rank-2
	  Numpy array (or compatible sequence) that relates nominal gains in
	  column 0 to actual gains in column 1. The rows of the array will be
	  used as control points in a piecewise linear interpolation (using
	  numpy.interp) that will map TGC parameters specified in the
	  WaveformSet file to actual gains. In other words, the TGC values
	  described above will be replaced with

		tgc = np.interp(tgc, tgcmap[:,0], tgcmap[:,1])

	  whenever tgcmap is provided.

	* tdout (default: False): Set to True to output time-domain waveforms
	  rather than spectral samples. Preserves input acquisition windows.

	* signs (default: None): When not None, should be a sequence of length
	  wset.txgrps.size that specifies a 1 for any local Hadamard index
	  (corresponding to lines in the file) that should be negated, and 0
	  anywhere else. Ignored when an FHT is not performed.

	* trmap (default: None): If provided, must be a map from a label
	  (referencing an output location in the map outfile) to a map from
	  receive indices to lists of transmit indices that, together, identify
	  transmit-receive pairs to extract from the input.

	* start (default: 0) and stride (default: 1): For an input WaveformSet
	  wset, process receive channels in wset.rxidx[start::stride].

	* lock (default: None): If not None, it should be a context manager
	  that is invoked to serialize writes to output.

	* event (default: None): Only used then trmap is not provided. If not
	  None, event.set() and event.wait() are called to ensure the output
	  header is written to the binary-blob output before records are
	  appended. The value event.is_set() should be False prior to
	  execution.
	'''
    # Override acquisition window, if desired
    nsamp = kwargs.pop('nsamp', None)

    # Grab synchronization mechanisms
    try:
        lock = kwargs.pop('lock')
    except KeyError:
        lock = multiprocessing.Lock()
    try:
        event = kwargs.pop('event')
    except KeyError:
        event = multiprocessing.Event()

    # Grab FFT and FHT switches and options
    tdout = kwargs.pop('tdout', False)
    freqs = kwargs.pop('freqs', None)
    rolloff = kwargs.pop('rolloff', None)
    dofft = (freqs is not None) or not tdout

    if freqs is not None:
        flo, fhi = freqs
        if rolloff and not 0 < rolloff < (fhi - flo) // 2:
            raise ValueError(
                'Rolloff must be None or less than half bandwidth')

    # Grab striding information
    start = kwargs.pop('start', 0)
    stride = kwargs.pop('stride', 1)

    # Grab sign map information
    signs = kwargs.pop('signs', None)

    # Grab the number of samples per TGC value and an optional gain map
    tgcsamps = kwargs.pop('tgcsamps', None)
    tgcmap = kwargs.pop('tgcmap', None)

    trmap = kwargs.pop('trmap', None)

    if len(kwargs):
        raise TypeError(f"Unrecognized keyword '{next(iter(kwargs))}'")

    # Open the input and create a corresponding output
    wset = WaveformSet.load(infile)

    # Pull default sample count from input file
    if nsamp is None: nsamp = wset.nsamp
    elif wset.nsamp < nsamp: wset.nsamp = nsamp

    # Handle TGC compensation if necessary
    try:
        tgc = np.asarray(wset.context['tgc'], dtype=np.float32)
    except (KeyError, AttributeError):
        tgc = np.array([], dtype=np.float32)

    if tgcmap is not None:
        # Make sure that the TGC map is sorted and interpolate
        tgx, tgy = zip(*sorted((k, v) for k, v in tgcmap))
        # TGC curves are always float32, regardless of tgcmap types
        tgc = np.interp(tgc, tgx, tgy).astype(np.float32)

    # Pick a suitable default value for tgcsamps
    if tgcsamps is None:
        tgcsamps = 16 if np.issubdtype(wset.dtype, np.integer) else 0

    # Linearize, invert, and expand the TGC curves
    tgc = ((10.**(-tgc[:, np.newaxis] / 20.) * np.ones(
        (len(tgc), tgcsamps), dtype=np.float32))).ravel('C')

    # Figure out the data type of compensated waveforms
    if len(tgc): itype = np.dtype(wset.dtype.type(0) * tgc.dtype.type(0))
    else: itype = wset.dtype

    # Make sure that the data type is always floating-point
    if not np.issubdtype(itype, np.floating): itype = np.dtype('float64')

    # Create a WaveformSet object to hold the ungrouped data
    ftype = _r2c_datatype(itype)
    otype = ftype if not tdout else itype

    # Make sure the WaveformSet has a local configuration
    try:
        gcount, gsize = wset.txgrps
    except TypeError:
        raise ValueError('A valid Tx-group configuration is required')

    if gsize < 1 or (gsize & (gsize - 1)):
        raise ValueError('Hadamard length must be a positive power of 2')

    # Validate local portion of the group map and assign
    wset.groupmap = groupmap

    if signs is not None:
        # Ensure signs has values 0 or 1 in the right type
        signs = np.asarray([1 - 2 * s for s in signs], dtype=itype)
        if signs.ndim != 1 or len(signs) != gsize:
            msg = f'Sign list must have shape ({wset.txgrps[1]},)'
            raise ValueError(msg)

    # Identify all FHTs represented by stored transmission indices
    fhts = {}
    for i in wset.txidx:
        g, l = i // gsize, i % gsize
        try:
            fhts[g].append(l)
        except KeyError:
            fhts[g] = [l]

    # Verify that all FHTs are complete
    for g, ll in fhts.items():
        if len(ll) != gsize:
            raise ValueError(f'FHT group {gi} is incomplete')
        if any(i != j for i, j in enumerate(sorted(ll))):
            raise ValueError(f'FHT group {gi} has improper local indices')

    # Map each FHT group to a list of row indices for the FHT
    # and each element corresponding to an FHT output to row indices
    gidx = lambda l, g: g * gsize + l
    fhts = {g: [wset.tx2row(gidx(l, g)) for l in range(gsize)] for g in fhts}
    invgroups = {(l, g): i for i, (l, g) in wset.groupmap.items()}
    el2row = {
        invgroups[l, g]: wset.tx2row(gidx(l, g))
        for g in fhts for l in range(gsize)
    }

    # Create intermediate (FHT) and output (FHFFT) arrays
    # FFT axis is contiguous for FFT performance
    b = pyfftw.empty_aligned((wset.ntx, nsamp), dtype=itype, order='C')

    if dofft:
        # Create FFT output and a plan
        cdim = (wset.ntx, nsamp // 2 + 1)
        c = pyfftw.empty_aligned(cdim, dtype=ftype, order='C')
        fwdfft = pyfftw.FFTW(b, c, axes=(1, ), direction='FFTW_FORWARD')

        # Create an inverse FFT plan for time-domain output
        if tdout:
            invfft = pyfftw.FFTW(c, b, axes=(1, ), direction='FFTW_BACKWARD')

        # Find the spectral window of interest
        fswin = specwin(cdim[1], freqs)

        # Try to build bandpass tails
        if rolloff: tails = np.hanning(2 * int(rolloff))
        else: tails = np.array([])

    if trmap:
        # Identify the subset of receive channels needed
        allrx = reduce(set.union, (trm.keys() for trm in trmap.values()),
                       set())
        rxneeded = sorted(allrx.intersection(wset.rxidx))[start::stride]
    else:
        rxneeded = wset.rxidx[start::stride]

        # In blob mode, the first write must create a header
        with lock:
            if not event.is_set():
                # Create a sliced binary matrix output
                windim = (nsamp if tdout else fswin.length, wset.ntx, wset.nrx)
                mio.Slicer(outfile, dtype=otype, trunc=True, dim=windim)
                event.set()

        # Ensure the output header has been written
        event.wait()

        # Map receive channels to rows (slabs) in the output
        rx2slab = dict((i, j) for (j, i) in enumerate(sorted(wset.rxidx)))
        # Map transmit channels to decoded FHT rows
        outrows = [r for (e, r) in sorted(el2row.items())]

        outbin = mio.Slicer(outfile)

    for rxc in rxneeded:
        # Find the input window relative to 0 f2c
        iwin = wset.getheader(rxc).win.shift(wset.f2c)
        owin = (0, nsamp)

        try:
            # Find overlap of global input and output windows
            ostart, istart, dlength = cutil.overlap(owin, iwin)
        except TypeError:
            # Default to 0-length windows at start of acquisition
            iwin = Window(0, 0, nonneg=True)
            owin = Window(0, 0, nonneg=True)
        else:
            # Convert input and output windows from global f2c to file f2c
            iwin = Window(istart, dlength, nonneg=True)
            owin = Window(ostart, dlength, nonneg=True)

        # Read the data over the input window
        data = wset.getrecord(rxc, window=iwin)[1]

        # Clear the data array
        b[:, :] = 0.
        ws, we = owin.start, owin.end

        if iwin.length and gsize > 1:
            # Perform grouped Hadamard transforms with optional sign flips
            for grp, rows in fhts.items():
                # Ensure FHT axis is contiguous for performance
                dblk = np.asfortranarray(data[rows, :])
                b[rows, ws:we] = fwht(dblk, axes=0) / gsize
                if signs is not None: b[rows, ws:we] *= signs[:, np.newaxis]
        else: b[:, ws:we] = data

        # Time-gain compensation, if necessary
        if len(tgc) and iwin.length:
            twin = (0, len(tgc))
            try:
                tstart, istart, dlength = cutil.overlap(twin, iwin)
                if dlength != iwin.length: raise ValueError
            except (TypeError, ValueError):
                raise ValueError(
                    f'TGC curve does not encompass data for channel {rxc}')
            b[:, ws:we] *= tgc[np.newaxis, tstart:tstart + dlength]

        if dofft:
            fwdfft()

            # Suppress content out of the band
            c[:, :fswin.start] = 0.
            c[:, fswin.end:] = 0.

            # Bandpass filter the spectral samples
            if len(tails) > 0:
                ltails = len(tails) // 2
                c[:, fswin.start:fswin.start +
                  ltails] *= tails[np.newaxis, :ltails]
                c[:, fswin.end - ltails:fswin.end] *= tails[np.newaxis,
                                                            -ltails:]

            # Revert to time-domain representation if necessary
            if tdout: invfft()

        if not trmap:
            # Write the binary blob for this receive channel
            orow = rx2slab[rxc]
            with lock:
                if tdout: outbin[orow] = b[outrows, :].T
                else: outbin[orow] = c[outrows, fswin.start:fswin.end].T
            # Nothing more to do in blob mode
            continue

        # Slice desired range from output data
        if tdout:
            dblock = b[:, ws:we]
            dstart = ws
        else:
            dblock = c[:, fswin.start:fswin.end]
            dstart = fswin.start

        for label, trm in trmap.items():
            # Pull tx list for this tier and rx channel, if possible
            try:
                tl = trm[rxc]
            except KeyError:
                tl = []

            if not len(tl): continue

            # Collect all transmissions for this rx channel
            wmap = WaveformMap()
            for t in tl:
                # Make sure transmission is represented in output
                try:
                    row = el2row[t]
                except KeyError:
                    continue

                wave = Waveform(nsamp, dblock[row], dstart)
                wmap[t, rxc] = wave

            # Flush the waveform map to disk
            with lock:
                wmap.store(outfile[label], append=True)
示例#3
0
    # Process the messages, adding waveforms to the local map
    printroot(grank, 'Collecting incoming waveforms...')
    wmap.update(procmessages(sendreqs, recvreqs, recvbufs))
    printroot(grank, f'Final size of local map at rank {grank} is {len(wmap)}')

    gnsize = MPI.COMM_WORLD.reduce(len(wmap))
    printroot(grank, f'{gnsize} waveforms scattered globally')

    # Build an output map
    omap = WaveformMap()
    while wmap:
        (t, r), left = wmap.popitem()
        try:
            right = wmap.pop((r, t))
        except KeyError:
            continue
        omap[min(t, r), max(t, r)] = pairavg(left, right, args.osamp,
                                             args.clip)

    gosize = MPI.COMM_WORLD.reduce(len(omap))
    printroot(grank, f'{gosize} reciprocal pairs averaged globally')

    # Write the output, serializing within local communicators
    for i in range(lsize):
        if i == lrank: omap.store(args.output, append=i)
        lcomm.Barrier()

    printroot(grank, 'End of control')
    MPI.COMM_WORLD.Barrier()
示例#4
0
	parser.add_argument('inputs', type=str, nargs='+',
			help='Input WaveformMap files from which to extract')

	args = parser.parse_args(sys.argv[1:])

	# Try to read all input WaveformMap files
	infiles = matchfiles(args.inputs)

	# Read a defined receive-to-transmit-list map
	if args.trmap: args.trmap = loadkeymat(args.trmap, scalar=False)

	# At first, clobber the output
	append = False

	for infile in infiles:
		wmap = WaveformMap.load(infile)

		# Build the appropriate subset of the WaveformMap
		if not args.backscatter: wvs = trextract(wmap, args.trmap, args.random)
		else: wvs = ((k, v) for k, v in wmap.items() if k[0] == k[1])
		omap = WaveformMap(wvs)

		if args.output:
			# Save to common output and switch to append mode
			omap.store(args.output, compression=args.compression, append=append)
			append = True
		else:
			output = os.path.splitext(infile)[0] + 'extract.wmz'
			omap.store(output, compression=args.compression, append=False)