Exemplo n.º 1
0
def rf_quality_metrics_queue(oqueue,
                             station_id,
                             station_stream3c,
                             similarity_eps,
                             drop_z=True):
    """Produce RF quality metrics in a stream and queue the QC'd components for
    downstream processing.

    :param oqueue: Output queue where filtered streams are queued
    :type oqueue: queue or multiprocessing.Manager.Queue
    :param station_id: Station ID
    :type station_id: str
    :param station_stream3c: 3-channel stream
    :type station_stream3c: list(rf.RFStream) with 3 components
    :param similarity_eps: Distance threshold used for DBSCAN clustering
    :type similarity_eps: float
    """
    streams_qual = compute_rf_quality_metrics(station_id, station_stream3c,
                                              similarity_eps)
    if streams_qual is not None:
        z_stream, p_stream, t_stream = streams_qual
        if drop_z:
            stream_qual = rf.RFStream(
                [tr for doublet in zip(p_stream, t_stream) for tr in doublet])
        else:
            stream_qual = rf.RFStream([
                tr for triplet in zip(z_stream, p_stream, t_stream)
                for tr in triplet
            ])
        # end if
        oqueue.put(stream_qual)
def get_eq_waveforms(station_list, sta_inv, eq_cat, filt_kws=None, **kwargs):
    """
    
    :param station_list: list of stations to retrieve data from
    :param sta_inv: obspy network inventory object containing station information
    :param eq_cat: obspy inventory object containing earthquake catalog
    
    """
    ev_cnt = 0
    for stat in station_list:
        op_stat = get_station_info(stat, sta_inv, xml=True)
        stat_dict = get_station_info(stat, sta_inv)    
        for i, event in enumerate(eq_cat):        
            cat_id = event.resource_id
            test_id = str(cat_id).split("=")[1].split("&")[0]
            print("\nSearching for event {0} at station {1}".format(test_id, stat))
            waveforms = read_passive(op_stat, event, filt_kws=filt_kws, **kwargs)
            if waveforms != None:
                if len(waveforms) == 3:
                    #print(len(waveforms))
                    if ev_cnt == 0:
                        #op_stream = waveforms
                        eq_stream = rf.RFStream(waveforms)
                        #print kwargs
                        if 'phase_list' in kwargs:
                            phase = kwargs['phase_list'][0]
                        else:
                            phase = 'P'
                            
                        stats = rf.rfstats(station=stat_dict, event=event, phase=phase, dist_range=(30,90))
                        for tr in eq_stream:
                            tr.stats.update(stats)
                    else:
                        #op_stream += waveforms
                        temp_stream = rf.RFStream(waveforms)
                        stats = rf.rfstats(station=stat_dict, event=event, phase=phase, dist_range=(30,90))                        
                        #print(stats)
                        #print(stat_dict, event)
                        if stats == None:
                            print("No rfstats calculated... skipping event")
                            continue
                        else:
                            for tr in temp_stream:
                                tr.stats.update(stats)
                            eq_stream.extend(temp_stream)
                    ev_cnt += 1
                else:
                    print("Imported stream does not have 3 traces ({0})... skipping event".format(len(waveforms)))
    
    if ev_cnt != 0:
        return eq_stream
    else:
        print("No earthquake waveforms found...")
        return None
Exemplo n.º 3
0
def _compute_rf(data, config, log):
    st = rf.RFStream()
    event_ids = config.get("event_ids")
    src_file = config.get("waveform_file")
    if event_ids is None:
        log.error("Unable to generate RF without event IDs")
        return st
    # end if
    if src_file is None:
        log.error("Unable to generate RF without path to source file")
        return st
    # end if
    if not os.path.isfile(src_file):
        log.error(
            "Source file {} for trace metadata not found, cannot generate RF".
            format(src_file))
        return
    # end if
    net, sta, loc = config["station_id"].split('.')
    src_waveforms = read_h5_stream(src_file, net, sta, loc)
    assert data.shape[0] == len(event_ids)
    for i, event_data in enumerate(data):
        evid = event_ids[i]
        src_stream = rf.RFStream(
            [tr for tr in src_waveforms if tr.stats.event_id == evid])
        # Z component
        z_header = src_stream.select(component='Z')[0].stats
        su_opts = config["su_energy_opts"]
        z_header.starttime = z_header.onset + su_opts["time_window"][0]
        z_header.sampling_rate = su_opts["sampling_rate"]
        z_header.delta = 1.0 / z_header.sampling_rate
        z_header.npts = event_data.shape[1]
        assert np.isclose(
            float(z_header.endtime - z_header.starttime),
            su_opts["time_window"][1] - su_opts["time_window"][0])
        tr = rf.rfstream.RFTrace(event_data[1, :], header=z_header)
        st += tr
        # R component
        r_header = z_header.copy()
        r_header.channel = z_header.channel[:-1] + 'R'
        tr = rf.rfstream.RFTrace(event_data[0, :], header=r_header)
        st += tr
    # end for

    st.filter('bandpass', freqmin=0.05, freqmax=1.0, corners=2, zerophase=True)
    normalize = 0  # Use Z-component for normalization
    st.rf(rotate=None,
          method='P',
          deconvolve='func',
          func=rf_iter_deconv,
          normalize=normalize,
          min_fit_threshold=75.0)

    return st
Exemplo n.º 4
0
def remove_small_s2n(stream, ratio):
    noise = stream.slice2(-5, -2, 'onset')
    signal = stream.slice2(-1, 2, 'onset')
    newstream = rf.RFStream()
    for i in xrange(len(stream)):
        rms = np.sqrt(np.mean(np.square(signal[i].data))) / np.sqrt(
            np.mean(np.square(noise[i].data)))
        if rms > ratio and stream[i].stats.distance > 35.:
            newstream.append(stream[i])
    return newstream
Exemplo n.º 5
0
def filter_crosscorr_coeff(rf_stream, time_window=(-2, 25), threshold_cc=0.70, min_fraction=0.15, apply_moveout=False):
    """For each trace in the stream, compute its correlation coefficient with the other traces.
    Return only traces matching cross correlation coefficient criteria based on C.Sippl (2016)
    [see http://dx.doi.org/10.1016/j.tecto.2016.03.031]

    :param rf_stream: Stream of RF traces to filter, should be **for a single component of a single station**
    :type rf_stream: rf.RFStream
    :param time_window: Time window to filter by, defaults to (-2, 25)
    :type time_window: tuple, optional
    :param threshold_cc: Threshold cross-correlation coefficient, defaults to 0.70.
        Denoted Xi in Sippl, who used value 0.80.
    :type threshold_cc: float, optional
    :param min_fraction: Minimum fraction of coefficients above threshold_cc, defaults to 0.15.
        Denoted tau in Sippl, who used value 0.15.
    :type min_fraction: float, optional
    :param apply_moveout: Whether to apply moveout correction to Ps phase prior to computing
        correlation coefficients.
    :type apply_moveout: bool
    :return: Filtered stream of RF traces
    :rtype: rf.RFStream
    """
    assert_homogenous_stream(rf_stream, filter_crosscorr_coeff.__name__)

    # Early exit if we don't have enough traces for similarity filtering to be meaningful.
    if len(rf_stream) < 3:
        return rf_stream
    # end if

    # Trim good RFs to time range so that subsequent cross-correlation computations relate to the
    # relevant period around and after onset.
    data_cc = rf_stream.copy().trim2(*time_window, reftime='onset')
    if not data_cc:
        return data_cc
    # end if

    # Apply optional moveout
    if apply_moveout:
        data_cc.moveout()
    # end if
    # Gather all RFs into a single array for efficient computation of correlation coefficients
    # between all traces
    data_array = np.array([tr.data for tr in data_cc])
    # Compute cross-correlation coefficients. cc matrix will be symmetric.
    # Each row of cc indicates the degree of correlation between each other trace.
    cc = np.corrcoef(data_array)
    # Determine mask of which traces meet the similarity filtering criteria
    fraction_above_threshold = np.sum(cc >= threshold_cc, axis=1)/len(data_cc)
    keep_trace_mask = (fraction_above_threshold >= min_fraction)
    kept_data = rf.RFStream([tr for i, tr in enumerate(rf_stream) if keep_trace_mask[i]])
    return kept_data
def calculate_rfs(eq_stream, filt_kw=None, deconvolve='time', moveout=True, savefile=None, **kwargs):
    """
    
    :param eq_stream: obspy stream object containing earthquake waveforms
    :param filt_kw: dict object containing parameters for an obspy filter
    :param deconvolve: string to select deconvolution method ('time' or 'freq')
    :param moveout: correct time delays of receiver function results for moveout (True or False)
    :param savefile: save result to hdf5 file (string with the full file path to file)
    """
    rf_stream = rf.RFStream()
    working_stream = eq_stream.copy()
    for stream3c in tqdm(rf.IterMultipleComponents(working_stream, 'onset', number_components=3)):
        bad_npts = 0
        bad_start_t = 0
        for i, tr in enumerate(stream3c):
            samp_rate = tr.stats.sampling_rate
            #if samp_rate != 25:
            #    warnings.warn("Sampling rate for tr is {0}".format(samp_rate))
            npts = tr.stats.npts
            thr_npts = samp_rate*90    # stream should have sr*streamlength datapoints
            if (npts - thr_npts) == 1:
                thr_npts = thr_npts + 1
            if thr_npts != npts:
                print("Expected npts {0}, actual number {1}... skipping event".format(thr_npts, npts))
                bad_npts = 1
            if i == 0:
                ref_t = tr.stats.starttime
            else:
                start_t = tr.stats.starttime
                if start_t != ref_t:
                    print("Inconsistent start times ({0} and {1}) in traces... skipping event".format(ref_t, start_t))
                    bad_start_t = 1
        
        if not bad_npts and not bad_start_t:
            if len(stream3c) != 3:
                continue

            stream3c.trim2(-25, 75, 'onset')
            stream3c.rotate('ZNE->LQT')
            #stream3c.deconvolve(method=deconvolve)
            stream3c.rf(deconvolve=deconvolve, filter=filt_kw, **kwargs)
            if moveout:                
                stream3c.moveout()
            #print(stream3c)
            rf_stream.extend(stream3c)

    if savefile != None:
        rf_stream.write(savefile, 'H5')
    
    return rf_stream
Exemplo n.º 7
0
def plot_rf_stack(rf_stream,
                  time_window=(-10.0, 25.0),
                  trace_height=0.2,
                  stack_height=0.8,
                  save_file=None,
                  **kwargs):
    """Wrapper function of rf.RFStream.plot_rf() to help do RF plotting with consistent formatting and layout.

    :param rf_stream: RFStream to plot
    :type rf_stream: rf.RFStream
    :param time_window: Time window to plot, defaults to (-10.0, 25.0)
    :type time_window: tuple, optional
    :param trace_height: Height of a single trace (reduce to cram RFs closer together), defaults to 0.2
    :type trace_height: float, optional
    :param stack_height: Height of mean (stacked) RF at top of plot, defaults to 0.8
    :type stack_height: float, optional
    :param save_file: File to save resulting image into, defaults to None
    :type save_file: str to valid file path, optional
    :return: Figure handle to the stack plot
    :rtype: matplotlib.figure.Figure
    """
    # Ensure traces are stackable by ignoring those that don't conform to the predominant data shape
    all_trace_lens = np.array([len(tr) for tr in rf_stream])
    most_common_len, _ = stats.mode(all_trace_lens, axis=None)
    stackable_stream = rf.RFStream(
        [tr for tr in rf_stream if len(tr) == most_common_len])
    num_stackable = len(stackable_stream)
    if num_stackable < len(rf_stream):
        logging.warning(
            'Removed {} traces from RF plot to make it stackable!'.format(
                num_stackable))
    # end if

    fig = stackable_stream.plot_rf(fillcolors=('#000000', '#a0a0a0'),
                                   trim=time_window,
                                   trace_height=trace_height,
                                   stack_height=stack_height,
                                   fname=save_file,
                                   show_vlines=True,
                                   **kwargs)
    return fig
Exemplo n.º 8
0
    # original file will be interpolated to 100Hz
    o_stream = o_stream.trim2(-5, 60, 'onset')

    station_list = []

    # here we collect station names but maybe ID is more appropriate in case of having the same station names in different deployments

    for i in xrange(len(q_stream)):
        station_list.append(q_stream[i].stats.station.encode('utf-8'))

    station_list = np.unique(np.array(station_list))
    print "Gathered ", len(station_list), " stations"

    # here we go with the main loop over stations
    out_file = rf.RFStream()

    for i in xrange(station_list.shape[0]):
        print "Station ", station_list[i], i + 1, " of ", station_list.shape[0]
        traces = q_stream.select(station=station_list[i]).copy()

        # we choose short RF to simplify and speed up the processing
        traces = traces.trim2(-5, 20, 'onset')

        # but keep original traces as they are to use them at the end
        o_traces = o_stream.select(station=station_list[i])

        swipe = []
        o_swipe = []

        for trace in traces:
Exemplo n.º 9
0
def plot_aux_data(soln, config, log, scale):
    """
    Plot auxiliary data such as energy distribution and receiver functions.

    :param soln: Solution container
    :type soln: Customized scipy.optimize.OptimizeResult
    :param config: Solution configuration
    :type config: dict
    :param log: Logging instance
    :type log: logging.Logger
    :param scale: Overall image scaling factor
    :type scale: float
    :return: Matplotlib figure containing the plotted data
    :rtype: matplotlib.figure.Figure
    """
    f = plt.figure(constrained_layout=False,
                   figsize=(6.4 * scale, 6.4 * scale))
    f.suptitle(config["station_id"], y=0.96, fontsize=16)
    gs = f.add_gridspec(2,
                        1,
                        left=0.1,
                        right=0.9,
                        bottom=0.1,
                        top=0.87,
                        hspace=0.3,
                        wspace=0.3,
                        height_ratios=[1, 2])
    gs_top = gs[0].subgridspec(1, 2)
    ax0 = f.add_subplot(gs_top[0, 0])
    ax1 = f.add_subplot(gs_top[0, 1])

    hist_alpha = 0.5
    soln_alpha = 0.3
    axis_font_size = 6 * scale
    title_font_size = 6 * scale
    nbins = 100

    # Plot energy distribution of samples and solution clusters
    energy_hist, bins = np.histogram(soln.sample_funvals, bins=nbins)
    energy_hist = energy_hist.astype(float) / np.max(energy_hist)
    ax0.bar(bins[:-1],
            energy_hist,
            width=np.diff(bins),
            align='edge',
            color='#808080',
            alpha=hist_alpha)

    for i, cluster_energies in enumerate(soln.cluster_funvals):
        color = 'C' + str(i)
        cluster_hist, _ = np.histogram(cluster_energies, bins)
        cluster_hist = cluster_hist.astype(float) / np.max(cluster_hist)
        ax0.bar(bins[:-1],
                cluster_hist,
                width=np.diff(bins),
                align='edge',
                color=color,
                alpha=soln_alpha)
    # end for
    ax0.set_title(
        'Energy distribution of random samples and solution clusters',
        fontsize=title_font_size)
    ax0.set_xlabel('$E_{SU}$ energy (arb. units)')
    ax0.set_ylabel('Normalized counts')
    ax0.tick_params(labelsize=axis_font_size)
    ax0.xaxis.label.set_size(axis_font_size)
    ax0.yaxis.label.set_size(axis_font_size)

    # Plot sorted per-event upwards S-wave energy at top of mantle per solution.
    # Collect event IDs of worst fit traces and present as table of waveform IDs.
    event_ids = config["event_ids"]
    events_best3 = []
    events_worst3 = []
    for i, esu in enumerate(soln.esu):
        assert len(esu) == len(event_ids)
        color = 'C' + str(i)
        esu_sorted = sorted(zip(esu, event_ids))
        events_best3.extend(esu_sorted[:3])
        events_worst3.extend(esu_sorted[-3:])
        esu_sorted = [e[0] for e in esu_sorted]
        ax1.plot(esu_sorted, color=color, alpha=soln_alpha)
    # end for
    events_best3 = sorted(events_best3)
    events_worst3 = sorted(events_worst3, reverse=True)
    best_events_set = set()
    worst_events_set = set()
    for _, evid in events_best3:
        best_events_set.add(evid)
        if len(best_events_set) >= 3:
            break
        # end if
    # end for
    for _, evid in events_worst3:
        worst_events_set.add(evid)
        if len(worst_events_set) >= 3:
            break
        # end if
    # end for
    _tab1 = table(ax1,
                  cellText=[[e] for e in best_events_set],
                  colLabels=['BEST'],
                  cellLoc='left',
                  colWidths=[0.35],
                  loc='upper left',
                  edges='horizontal',
                  fontsize=8,
                  alpha=0.6)  # alpha broken in matplotlib.table!
    _tab2 = table(ax1,
                  cellText=[[e] for e in worst_events_set],
                  colLabels=['WORST'],
                  cellLoc='left',
                  colWidths=[0.35],
                  loc='upper right',
                  edges='horizontal',
                  fontsize=8,
                  alpha=0.6)
    ax1.set_title('Ranked per-event energy for each solution point',
                  fontsize=title_font_size)
    ax1.set_xlabel('Rank (out of # source events)')
    ax1.set_ylabel('Event $E_{SU}$ energy (arb. units)')
    ax1.tick_params(labelsize=axis_font_size)
    ax1.xaxis.label.set_size(axis_font_size)
    ax1.yaxis.label.set_size(axis_font_size)

    # Plot receiver function at base of selected layers
    axis_font_size = 6 * scale
    max_solutions = config["solver"].get("max_solutions", 3)
    for layer in config["layers"]:
        lname = layer["name"]
        if soln.subsurface and lname in soln.subsurface:
            base_seismogms = soln.subsurface[lname]
            # Generate RF and plot.
            gs_bot = gs[1].subgridspec(max_solutions, 1, hspace=0.4)
            for i, seismogm in enumerate(base_seismogms):
                soln_rf = _compute_rf(seismogm, config, log)
                assert isinstance(soln_rf, rf.RFStream)
                # Remove any traces for which deconvolution failed.
                # First, find their unique ID. Then remove all traces with that ID.
                exclude_ids = set(
                    [tr.stats.event_id for tr in soln_rf if len(tr) == 0])
                soln_rf = rf.RFStream([
                    tr for tr in soln_rf
                    if tr.stats.event_id not in exclude_ids
                ])
                axn = f.add_subplot(gs_bot[i])
                if soln_rf:
                    color = 'C' + str(i)
                    rf_R = soln_rf.select(component='R').trim2(
                        RF_TRIM_WINDOW[0], RF_TRIM_WINDOW[1], reftime='onset')
                    num_RFs = len(rf_R)
                    times = rf_R[0].times() + RF_TRIM_WINDOW[0]
                    data = rf_R.stack()[0].data
                    axn.plot(times,
                             data,
                             color=color,
                             alpha=soln_alpha,
                             linewidth=2)
                    axn.text(0.95,
                             0.95,
                             'N = {}'.format(num_RFs),
                             fontsize=10,
                             ha='right',
                             va='top',
                             transform=axn.transAxes)
                    axn.set_xlabel('Time (sec)')
                    axn.grid(color='#80808080', linestyle=':')
                else:
                    axn.annotate('Empty RF plot', (0.5, 0.5),
                                 xycoords='axes fraction',
                                 ha='center')
                # end if
                axn.set_title(' '.join([
                    config["station_id"], lname, 'base RF',
                    '(soln {})'.format(i)
                ]),
                              fontsize=title_font_size,
                              y=0.92,
                              va='top')
                axn.tick_params(labelsize=axis_font_size)
                axn.xaxis.label.set_size(axis_font_size)
                axn.yaxis.label.set_size(axis_font_size)
            # end for
            break  # TODO: Figure out how to add more layers if needed
        # end if
    # end for

    return f
Exemplo n.º 10
0
def rf_inversion_export(input_h5_file,
                        output_folder,
                        network_code,
                        component='R',
                        resample_freq=6.25,
                        trim_window=(-5.0, 20.0),
                        moveout=True):
    """Export receiver function to text format for ingestion into Fortran RF inversion code.

    :param input_h5_file: Input hdf5 file containing receiver function data
    :type input_h5_file: str or Path
    :param output_folder: Folder in which to export text files, one per channel per station.
        Will be appended with network code.
    :type output_folder: str or Path
    :param network_code: Network to which this RF data belongs, used to disambiguate and track folders.
    :type network_code: str
    :param component: The channel component to export, defaults to 'R'
    :type component: str, optional
    :param resample_freq: Sampling rate (Hz) of the output files, defaults to 6.25 Hz
    :type resample_freq: float, optional
    :param trim_window: Time window to export relative to onset, defaults to (-5.0, 20.0). If data needs
        to be resampled, the samples are anchored to the start of this time window.
    :type trim_window: tuple, optional
    :param moveout: Whether to apply moveout correction prior to exporting, defaults to True
    :type moveout: bool, optional
    """
    # Process for each station:
    # 1. Load hdf5 file containing RFs
    # 2. Filter to desired component.
    # 3. Quality filter to those that meet criteria (Sippl cross-correlation similarity)
    # 4. Moveout and stack the RFs
    # 5. Resample (lanczos) and trim RF
    # 6. Export one file per station in (time, amplitude format)

    output_folder += "_" + network_code
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder, exist_ok=True)
    # end if

    data = rf_util.read_h5_rf(input_h5_file)

    data = data.select(component=component)

    rf_util.label_rf_quality_simple_amplitude('ZRT',
                                              data,
                                              snr_cutoff=2.0,
                                              rms_amp_cutoff=0.2,
                                              max_amp_cutoff=2.0)
    data = rf.RFStream(
        [tr for tr in data if tr.stats.predicted_quality == 'a'])

    data_dict = rf_util.rf_to_dict(data)

    for sta, ch_dict in data_dict:
        for cha, ch_traces in ch_dict.items():
            similar_traces = rf_util.filter_crosscorr_coeff(
                rf.RFStream(ch_traces))
            if not similar_traces:
                continue
            if moveout:
                similar_traces.moveout()
            # end if
            stack = similar_traces.stack()
            trace = stack[0]
            exact_start_time = trace.stats.onset + trim_window[0]
            stack.interpolate(sampling_rate=resample_freq,
                              method='lanczos',
                              a=10,
                              starttime=exact_start_time)
            stack.trim2(*trim_window, reftime='onset')

            times = trace.times() - (trace.stats.onset - trace.stats.starttime)
            # TODO: Remove hardwired scaling factor.
            # This scaling factor only applies to iterative deconvolution with default Gaussian width
            # factor of 2.5. Once we upgrade to rf library version >= 0.9.0, we can remove this hardwired
            # setting and instead have it determined programatically from rf processing metadata stored
            # in the trace stats structure.
            # The scaling factor originates in the amplitude attenuation effect of the filtering applied
            # in iterative deconv, see table at end of this page:
            # http://eqseis.geosc.psu.edu/~cammon/HTML/RftnDocs/seq01.html
            # The values in this reference table are derived as the integral of the area under the
            # Gaussian in the frequency domain. Analytically, this amounts to simply dividing by scaling
            # factor of a/sqrt(pi), where 'a' here is the Gaussian width used in iterative deconvolution.
            iterdeconv_scaling = 2.5 / np.sqrt(np.pi)
            column_data = np.array([times, trace.data / iterdeconv_scaling]).T
            fname = os.path.join(
                output_folder, "_".join([network_code, sta, cha]) + "_rf.dat")
            np.savetxt(fname, column_data, fmt=('%5.2f', '%.8f'))
Exemplo n.º 11
0
def synthesize_rf_dataset(H,
                          V_p,
                          V_s,
                          inclinations,
                          distances,
                          ds,
                          log=None,
                          include_t3=False,
                          amplitudes=None,
                          baz=0.0):
    """Synthesize RF R-component data set over range of inclinations and distances
    and get result as a rf.RFStream instance.

    :param H: Moho depth (km)
    :type H: float
    :param V_p: P body wave velocity in uppermost layer
    :type V_p: float
    :param V_s: S body wave velocity in uppermost layer
    :type V_s: float
    :param inclinations: Array of inclinations for which to create RFs
    :type inclinations: numpy.array(float)
    :param distances: Array of teleseismic distances corresponding to inclinations
    :type distances: numpy.array(float)
    :param ds: Final sampling rate (Hz) for the downsampled output signal
    :type ds: float
    :param log: Logger to send output to, defaults to None
    :type log: logger, optional
    :param include_t3: If True, include the third expected multiple PpSs+PsPs
    :type include_t3: bool, optional
    :param amplitudes: Custom amplitudes to apply to the multiples
    :type amplitudes: list(float), optional
    :param baz: Back azimuth for metadata
    :type baz: float, optional
    :return: Stream containing synthetic RFs
    :rtype: rf.RFStream
    """
    assert len(inclinations) == len(
        distances), "Must provide 1:1 inclination and distance pairs"

    k = V_p / V_s
    traces = []
    arrivals = None
    for i, inc_deg in enumerate(inclinations):
        theta_p = np.deg2rad(inc_deg)
        p = np.sin(theta_p) / V_p

        t1 = H * (np.sqrt((k * k / V_p / V_p) - p * p) -
                  np.sqrt(1.0 / V_p / V_p - p * p))
        t2 = H * (np.sqrt((k * k / V_p / V_p) - p * p) +
                  np.sqrt(1.0 / V_p / V_p - p * p))
        arrivals = [t1, t2]
        if include_t3:
            t3 = t1 + t2
            arrivals.append(t3)
        if log is not None:
            log.info("Inclination {:3g} arrival times: {}".format(
                inc_deg, arrivals))

        arrivals = [0] + arrivals
        if amplitudes is None:
            amplitudes = [1, 0.5, 0.4]
            if include_t3:
                amplitudes.append(-0.3)
            # end if
        else:
            assert len(amplitudes) == 3 + int(include_t3)
            # t3 amplitude should be negative
            assert (not include_t3) or (amplitudes[3] <= 0)
        # end if
        window = (-5.0, 50.0)  # sec
        fs = 100.0  # Hz
        _, synth_signal = generate_synth_rf(arrivals,
                                            amplitudes,
                                            fs_hz=fs,
                                            window_sec=window)

        now = obspy.UTCDateTime.now()
        # Make sure time difference of events is at least 1 second, since onset time is used as part of
        # logic for identifying related channels in rf.RFStream.
        now += float(i)
        dt = float(window[1] - window[0])
        end = now + dt
        onset = now - window[0]
        header = {
            'network': 'SY',
            'station': 'TST',
            'location': 'GA',
            'channel': 'HHR',
            'sampling_rate': fs,
            'starttime': now,
            'endtime': end,
            'onset': onset,
            'station_latitude': -19.0,
            'station_longitude':
            137.0,  # arbitrary (approx location of OA deployment)
            'slowness': p * KM_PER_DEG,
            'inclination': inc_deg,
            'back_azimuth': baz,
            'distance': float(distances[i])
        }
        tr = rf.rfstream.RFTrace(data=synth_signal.copy(), header=header)
        tr = tr.decimate(int(np.round(fs / ds)), no_filter=True)
        traces.append(tr)
    # end for

    stream = rf.RFStream(traces)

    return stream, arrivals
Exemplo n.º 12
0
def main():
    """Main entry function for RF picking tool.
    """
    infile = filedialog.askopenfilename(initialdir=".",
                                        title="Select RF file",
                                        filetypes=(("h5 files", "*.h5"), ))
    output_folder = filedialog.askdirectory(
        initialdir=os.path.split(infile)[0], title='Select output folder')
    if not os.path.isdir(output_folder):
        log.info("Creating output folder {}".format(output_folder))
        os.makedirs(output_folder, exist_ok=True)
    # end if
    log.info("Output files will be emitted to {}".format(output_folder))

    log.info("Loading %s", infile)
    data_all = rf_util.read_h5_rf(infile)
    data_dict = rf_util.rf_to_dict(data_all)

    stations = sorted(list(data_dict.keys()))

    # Assuming same rotation type for all RFs. This is consistent with the existing workflow.
    rf_type = data_all[0].stats.rotation

    for st in stations:
        station_db = data_dict[st]

        # Choose RF channel
        channel = rf_util.choose_rf_source_channel(rf_type, station_db)
        channel_data = station_db[channel]
        # Check assumption
        for tr in channel_data:
            assert tr.stats.rotation == rf_type, 'Mismatching RF rotation type'

        # Label and filter quality
        rf_util.label_rf_quality_simple_amplitude(rf_type, channel_data)
        rf_stream = rf.RFStream([
            tr for tr in channel_data if tr.stats.predicted_quality == 'a'
        ]).sort(['back_azimuth'])
        if not rf_stream:
            log.info("No data survived filtering for %s, skipping", st)
            continue

        # Plot RF stack of primary component
        fig = rf_plot_utils.plot_rf_stack(rf_stream)
        fig.set_size_inches(8, 9)
        fig.suptitle("Channel {}".format(rf_stream[0].stats.channel))
        ax0 = fig.axes[0]
        # Make sure we draw once first before capturing blit background
        fig.canvas.draw()
        # Disallow resizing to avoid having to handle blitting with resized window.
        win = fig.canvas.window()
        win.setFixedSize(win.size())
        blit_background = fig.canvas.copy_from_bbox(ax0.bbox)

        mask = np.array([False] * len(rf_stream))
        rect_select = RectangleSelector(ax0,
                                        lambda e0, e1: on_select(e0, e1, mask),
                                        useblit=True,
                                        rectprops=dict(fill=False,
                                                       edgecolor='red'))
        cid = fig.canvas.mpl_connect(
            'button_release_event',
            lambda e: on_release(e, ax0, mask, blit_background, rect_select))
        plt.show()

        fig.canvas.mpl_disconnect(cid)
        rect_select = None

        selected_event_ids = [
            tr.stats.event_id for i, tr in enumerate(rf_stream) if mask[i]
        ]
        log.info("{} streams selected".format(len(selected_event_ids)))
        log.info("Selected event ids:")
        log.info(selected_event_ids)

        network = rf_stream[0].stats.network
        outfile = os.path.join(
            output_folder,
            '.'.join([network, st, channel]) + '_event_mask.txt')
        log.info("Writing mask to file {}".format(outfile))
        if os.path.exists(outfile):
            log.warning("Overwriting existing file {} !".format(outfile))
        with open(outfile, 'w') as f:
            f.write('\n'.join(selected_event_ids))
Exemplo n.º 13
0
def compute_rf_quality_metrics(station_id, station_stream3c, similarity_eps):
    """Top level function for adding quality metrics to trace metadata.

    :param station_id: Station ID
    :type station_id: str
    :param station_stream3c: 3-channel stream
    :type station_stream3c: list(rf.RFStream) with 3 components
    :param similarity_eps: Distance threshold used for DBSCAN clustering
    :type similarity_eps: float
    :return: Triplet of RF streams with Z, R or Q, and T components with populated
        quality metrics. Otherwise return None in case of failure.
    """

    logger = logging.getLogger(__name__)

    # Filter out traces with NaNs - simplifies downstream code so that can don't have to worry about NaNs.
    # We use the fact that traces are bundled into 3-channel triplets here to discard all or none of the related
    # channels for an event.
    nonan_streams = []
    for stream in station_stream3c:
        skip_stream = False
        for tr in stream:
            if tr.stats.type == 'rf' and np.any(np.isnan(tr.data)):
                logger.warning(
                    "NaN data found in station {} trace\n{}\n, skipping!".
                    format(station_id, tr))
                skip_stream = True
                break
        # end for
        if skip_stream:
            continue
        nonan_streams.append(stream)
    # end for
    if len(nonan_streams) < len(station_stream3c):
        num_supplied = len(station_stream3c)
        num_discarded = num_supplied - len(nonan_streams)
        logger.info(
            "Discarded {}/{} events from station {} due to NaNs in at least one channel"
            .format(num_discarded, num_supplied, station_id))
    # end if

    # Early exit if nothing left
    if not nonan_streams:
        logger.warning(
            "nonan_streams empty after filtering out nan traces! {}. Skipping station {}"
            .format(nonan_streams, station_id))
        return None
    # end if

    # Flatten the traces into a single RFStream for subsequent processing
    rf_streams = rf.RFStream([
        tr for stream in nonan_streams for tr in stream
        if tr.stats.type == 'rf'
    ])

    # Subsequent functions process the data in bulk square matrices, so it is essential all traces are the same length.
    # If not, processing will fail due to incompatible data structure. So here we filter out traces that do not have
    # the expected length. Expected length is assumed to be the most common length amongst all the traces.
    num_traces_before = len(rf_streams)
    all_trace_lens = np.array([len(tr) for tr in rf_streams])
    expected_len, _ = stats.mode(all_trace_lens, axis=None)
    expected_len = expected_len[0]
    if expected_len <= 1:
        logger.warning(
            "Cannot compute quality metrics on trace length {} <= 1! Skipping station {}"
            .format(expected_len, station_id))
        return None
    # end if
    keep_traces = []
    for tr in rf_streams:
        if len(tr) != expected_len:
            logger.error(
                "Trace {} of station {} has inconsistent sample length {} (expected {}), discarding!"
                .format(tr, station_id, len(tr), expected_len))
        else:
            keep_traces.append(tr)
        # end if
    # end for

    streams = rf.RFStream(keep_traces)
    num_traces_after = len(streams)
    if num_traces_after < num_traces_before:
        num_discarded = num_traces_before - num_traces_after
        logger.warning(
            "Discarded {}/{} traces due to inconsistent trace length".format(
                num_discarded, num_traces_before))
    # end if

    # Extract RF type, the primary polarized component and transverse component (ignore source stream)
    rf_type, p_stream, t_stream, z_stream = get_rf_stream_components(streams)
    if rf_type is None:
        logger.error(
            "Unrecognized RF type for station {}. File might not be RF file!".
            format(station_id))
        return None
    # end if

    # Note that we only compute quality metrics on the p_stream. The filtering of t_stream traces should match
    # the filtering of p_stream traces, so t_stream does not need independent metrics.

    # Compute S/N ratios for primary component RFs
    rf_util.compute_rf_snr(p_stream)

    # Compute spectral entropy for primary component RFs
    sp_entropy = spectral_entropy(p_stream)
    for i, tr in enumerate(p_stream):
        md_dict = {'entropy': sp_entropy[i]}
        tr.stats.update(md_dict)
    # end for

    # Compute log10 of amplitude metrics, as these are more useful than straight amplitudes for quality classifier
    for tr in p_stream:
        tr.stats['log10_amp_max'] = np.log10(tr.stats['amp_max'])
        tr.stats['log10_amp_rms'] = np.log10(tr.stats['amp_rms'])
        tr.stats['log10_z_amp_max'] = np.log10(tr.stats['z_amp_max'])
        tr.stats['log10_z_amp_rms'] = np.log10(tr.stats['z_amp_rms'])
    # end for

    # Define time windows relative to onset for computing statistical ratios
    EVENT_SIGNAL_WINDOW = (-5.0, 25.0)
    NOISE_SIGNAL_WINDOW = (None, -5.0)
    event_signal = p_stream.copy().slice2(*EVENT_SIGNAL_WINDOW,
                                          reftime='onset').taper(
                                              0.5, max_length=1.0)
    noise_signal = p_stream.copy().slice2(*NOISE_SIGNAL_WINDOW,
                                          reftime='onset').taper(
                                              0.5, max_length=1.0)
    rf_util.compute_extra_rf_stats(event_signal)
    rf_util.compute_extra_rf_stats(noise_signal)
    for _i, _tr in enumerate(p_stream):
        _tr.stats['delta_mean_log10_cplx_amp'] = (
            event_signal[_i].stats.mean_log10_cplx_amp -
            noise_signal[_i].stats.mean_log10_cplx_amp)
        _tr.stats['delta_log10_amp_20pc'] = (
            event_signal[_i].stats.log10_amp_20pc -
            noise_signal[_i].stats.log10_amp_20pc)
        _tr.stats['delta_log10_amp_80pc'] = (
            event_signal[_i].stats.log10_amp_80pc -
            noise_signal[_i].stats.log10_amp_80pc)
        _tr.stats['delta_log10_rms_amp'] = event_signal[
            _i].stats.log10_rms_amp - noise_signal[_i].stats.log10_rms_amp
    # end for

    # Compute ratios of spectral histogram statistics
    noise_data = np.array([tr.data for tr in noise_signal])
    event_data = np.array([tr.data for tr in event_signal])
    noise_bins, noise_power = signal.welch(noise_data, detrend='linear')
    event_bins, event_power = signal.welch(event_data, detrend='linear')
    # Compute moments of the frequency distribution. Only use lower frequency bands up to 1/4 Nyquist.
    noise_bins = noise_bins[0:32]
    noise_power = noise_power[:, 0:32]
    event_bins = event_bins[0:32]
    event_power = event_power[:, 0:32]
    noise_m0 = np.sum(noise_power, axis=1)
    event_m0 = np.sum(event_power, axis=1)
    spectral_m0_ratio = np.log10(event_m0 / noise_m0)
    noise_m1 = np.sum(noise_power * noise_bins, axis=1)
    event_m1 = np.sum(event_power * event_bins, axis=1)
    spectral_m1_ratio = np.log10(event_m1 / noise_m1)
    noise_m2 = np.sum(noise_power * noise_bins**2, axis=1)
    event_m2 = np.sum(event_power * event_bins**2, axis=1)
    spectral_m2_ratio = np.log10(event_m2 / noise_m2)
    for i, tr in enumerate(p_stream):
        md_dict = {
            'm0_delta': event_m0[i] - noise_m0[i],
            'm1_delta': event_m1[i] - noise_m1[i],
            'm2_delta': event_m2[i] - noise_m2[i],
            'm0_ratio': spectral_m0_ratio[i],
            'm1_ratio': spectral_m1_ratio[i],
            'm2_ratio': spectral_m2_ratio[i]
        }
        tr.stats.update(md_dict)
    # end for

    # Compute coherence metric within targeted normalized frequency band.
    # Note that settings here are relative to the sampling rate. If the sampling
    # rate changes and you want the same absolute frequency range to be used for
    # coherence, then these settings need to be updated.
    fn_low = 0.15
    fn_high = 0.3
    max_coherence = compute_max_coherence(p_stream, fn_low, fn_high)
    for i, tr in enumerate(p_stream):
        md_dict = {'max_coherence': max_coherence[i]}
        tr.stats.update(md_dict)
    # end for

    # TODO: Compute phase weighting vector per station per 2D (back_azimuth, distance) bin

    # Perform clustering for all traces in a station, and assign group IDs.
    # This will be super expensive when there are a lot of events, as the distance calculation grows as N^2.
    clustering_stream = p_stream.copy()
    clustering_stream = clustering_stream.trim2(-5.0, 25.0, 'onset')
    swipe = np.array([tr.data for tr in clustering_stream])
    if swipe.shape[0] > 1:
        ind = rf_group_by_similarity(swipe, similarity_eps)
    else:
        ind = np.array([0])
    # end if
    num_groups = np.amax(ind)
    logger.info("Station {}: detected {} clusters".format(
        station_id, num_groups))
    # Apply group
    for i, tr in enumerate(p_stream):
        md_dict = {'rf_group': ind[i] if ind[i] >= 0 else None}
        tr.stats.update(md_dict)
    # end for

    # TODO: Research techniques for grouping waveforms using singular value decomposition (SVD), possibly of
    # the complex waveform (from Hilbert transform) to determine the primary phase and amplitude components.
    # High similarity to the strongest eigenvectors might indicate waves in the primary group (group 0 in DBSCAN)
    # without the N^2 computational cost of DBSCAN.

    return (z_stream, p_stream, t_stream)
Exemplo n.º 14
0
def main():
    ''' @package extract_rf
    This code contains different approaches to extract RFs from H5 file in stacked form.
    Output is prepared for trans-dimensional inversion in ASCII format

    Currently there are two methods of stacking
    1. rf stacked by similarity
    2. all rf stacked

    Note the parameters of gaussian pulse and its width where

Value of "a" | Frequency (hz) at which G(f) = 0.1 |  Approximate Pulse Width (s)

10                      4.8                                0.50
5                       2.4                                0.75
2.5                     1.2                                1.00
1.25                    0.6                                1.50
1.0                     0.5                                1.67 (5/3)
0.625                   0.3                                2.10
0.5                     0.24                               2.36
0.4                     0.2                                2.64
0.2                     0.1                                3.73

    '''
    print("Reading the input file...")
    # Input file
    stream = rf.read_rf('/g/data/ha3/am7399/shared/OA-ZRT-R-cleaned.h5', 'H5')
    print("Reading is done...")

    net = stream[0].stats.network
    # output directory
    out_dir = net + "-INV/"

    # inversion programs use 1Hz pulse width, therefore higher corner should be not lower than that
    filter_type = 'bandpass'
    freqmin = 0.1
    freqmax = 1.0

    # Trimming window
    tstart = -5.
    tend = 40.

    station_list = []
    group_list = []
    # here we collect station names

    for i in xrange(len(stream)):
        station_list.append(stream[i].stats.station)
        group_list.append(stream[i].stats.rf_group)

    group_list = np.array(group_list)
    station_list = np.array(station_list)

    # we need to find the largest number of groups for each uniqe station
    gidx = np.argsort(-group_list)
    group_list = group_list[gidx]
    station_list = station_list[gidx]

    # unique will return first occurence of the station sorted in descending order of group number
    station_list, idx = np.unique(station_list, return_index=True)
    group_list = group_list[idx]

    print("Gathered ", len(station_list), " stations")
    for i in xrange(station_list.shape[0]):
        print(station_list[i], group_list[i])

    estat = ''
    sstat = []

    #   while station_list[estat==station_list].shape[0]==0:
    #         estat=input("Station to extract: ")
    estat = input("Station to extract [All]: ")
    if station_list[estat == station_list].shape[0] == 0:
        sstat = station_list
        plot = False
    else:
        sstat.append(estat)
        plot = True

    for estat in sstat:

        station = stream.select(station=estat, component='R').moveout()

        # we use a zero-phase-shift band-pass filter using 2 corners. This is done in two runs forward and backward, so we end up with 4 corners de facto.
        # print(station[0].stats.delta,station[0].stats.npts)

        if len(station) > 1:

            for trace in station:
                # preserve original amplitude to rescale later to preserve proportions relative to source
                if trace.stats.amax > 0:
                    amp_max = trace.stats.amax
                    print("*")
                else:
                    amp_max = np.max(trace.data)
                trace.taper(0.01)
                # 6.25 is the frequency hardwired into the inversion program
                trace = trace.filter(filter_type,
                                     freqmin=freqmin,
                                     freqmax=freqmax,
                                     corners=2,
                                     zerophase=True).interpolate(6.25)
                #   trace=trace.interpolate(6.25)
                trace.data = trace.data * (amp_max / np.amax(trace.data))
            # end for

            # first we get stacks - normal and phase weighted
            copy_st = station.copy()
            stacked = station.copy().stack()
            stacked.trim2(tstart, tend, 'onset')
            time_s = stacked[0].stats.delta * np.array(
                list(xrange(stacked[0].stats.npts))) + tstart

            amp_max = np.max(stacked[0].data)

            phase_w = phase_weights(station)
            ph_weighted = copy_st.stack()
            ph_weighted[0].data = ph_weighted[0].data * phase_w
            # Note - weighting changes the real amplitude and it must be rescaled back to origin
            ph_weighted.trim2(tstart, tend, 'onset')
            time_p = ph_weighted[0].stats.delta * np.array(
                list(xrange(ph_weighted[0].stats.npts))) + tstart
            zero = ph_weighted[0].data[time_p < 0.]
            idx = np.max(np.where(zero <= 0.)[0])
            ph_weighted[0].data[:idx + 1] = 0.
            #   ph_weighted.filter(filter_type, freqmin=freqmin, freqmax=freqmax,corners=1,zerophase=True)
            ph_weighted[0].data = ph_weighted[0].data * (
                amp_max / np.max(ph_weighted[0].data))

            # then we take the same for each similarity groups

            groups = find_rf_group_ids(station)
            max_grp = np.max(groups)
            print("Max grp ", max_grp)

            # however first we define general plotting scheme and plot previous results
            fig = plt.figure(figsize=(11.69, 8.27))
            columns = 2
            rows = np.int(np.ceil(float(max_grp) / float(columns))) + 1
            grid = gridspec.GridSpec(columns, rows, wspace=0.2, hspace=0.2)
            ax = plt.subplot(grid[0])
            ax.plot(time_s, stacked[0].data)
            ax.set_title(estat + ' Stacked')
            ax = plt.subplot(grid[1])
            ax.plot(time_p, ph_weighted[0].data)
            ax.set_title('Phase weighted stack')

            frame = 2
            for i in xrange(max_grp):

                grp_stream = rf.RFStream()

                for trace in station:
                    if trace.stats.rf_group == i:
                        grp_stream.append(trace)
                print("Group: ", i, " number of records: ", len(grp_stream))
                grp_stacked = grp_stream.copy().stack()
                grp_stck_max = np.max(np.abs(grp_stacked.copy()[0].data))
                # grp_stck_max=amp_max
                phase_w = phase_weights(grp_stream)
                grp_stacked_wght = grp_stacked.copy()[0].data * phase_w
                grp_stacked_wght = grp_stacked_wght * (
                    grp_stck_max / np.max(np.abs(grp_stacked_wght)))

                grp_time = grp_stacked[0].stats.delta * np.array(
                    list(xrange(grp_stacked[0].stats.npts))) + tstart
                ax = plt.subplot(grid[i + frame])
                ax.plot(grp_time, grp_stacked_wght)
                ax.set_title('Group ' + str(i))
            # end for

            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
                os.makedirs(out_dir + 'PDF')
            # end if

            if plot:
                plt.show()
            else:
                fig.savefig(out_dir + 'PDF/' + net + '-' + estat +
                            '-rf2-ph_weighted.pdf',
                            format='PDF')
                plt.close('all')
            # end if

            with open(out_dir + net + '-' + estat + '-rf2-ph_weighted.dat',
                      'w') as text_file:
                for i in xrange(time_p.shape[0]):
                    text_file.write(
                        str(time_p[i]) + '   ' + str(ph_weighted[0].data[i]) +
                        '\n')

            text_file.close()
Exemplo n.º 15
0
            # however first we define general plotting scheme and plot previous results
            fig = plt.figure(figsize=(11.69, 8.27))
            columns = 2
            rows = np.int(np.ceil(float(max_grp) / float(columns))) + 1
            grid = gridspec.GridSpec(columns, rows, wspace=0.2, hspace=0.2)
            ax = plt.subplot(grid[0])
            ax.plot(time_s, stacked[0].data)
            ax.set_title(estat + ' Stacked')
            ax = plt.subplot(grid[1])
            ax.plot(time_p, ph_weighted[0].data)
            ax.set_title('Phase weighted stack')

            frame = 2
            for i in xrange(max_grp):

                grp_stream = rf.RFStream()

                for trace in station:
                    if trace.stats.rf_group == i:
                        grp_stream.append(trace)
                print "Group: ", i, " number of records: ", len(grp_stream)
                grp_stacked = grp_stream.copy().stack()
                grp_stck_max = np.max(np.abs(grp_stacked.copy()[0].data))
                #                 grp_stck_max=amp_max
                phase_w = phase_weights(grp_stream)
                grp_stacked_wght = grp_stacked.copy()[0].data * phase_w
                grp_stacked_wght = grp_stacked_wght * (
                    grp_stck_max / np.max(np.abs(grp_stacked_wght)))

                grp_time = grp_stacked[0].stats.delta * np.array(
                    list(xrange(grp_stacked[0].stats.npts))) + tstart
Exemplo n.º 16
0
def main(input_file,
         output_file,
         event_mask_folder='',
         apply_amplitude_filter=False,
         apply_similarity_filter=False,
         hk_weights=DEFAULT_HK_WEIGHTS,
         hk_solution_labels=DEFAULT_HK_SOLN_LABEL,
         hk_hpf_freq=None,
         hk_vp=DEFAULT_Vp,
         save_hk_solution=False):
    # docstring redundant since CLI options are already documented.

    log.setLevel(logging.INFO)

    # Read source file
    log.info("Loading input file {}".format(input_file))
    data_all = rf_util.read_h5_rf(input_file)

    # Convert to hierarchical dictionary format
    data_dict = rf_util.rf_to_dict(data_all)

    event_mask_dict = None
    if event_mask_folder and os.path.isdir(event_mask_folder):
        log.info(
            "Applying event mask from folder {}".format(event_mask_folder))
        mask_files = os.listdir(event_mask_folder)
        mask_files = [
            f for f in mask_files
            if os.path.isfile(os.path.join(event_mask_folder, f))
        ]
        pattern = r"([A-Za-z0-9\.]{5,})_event_mask\.txt"
        pattern = re.compile(pattern)
        event_mask_dict = dict()
        for f in mask_files:
            match_result = pattern.match(f)
            if not match_result:
                continue
            code = match_result[1]
            with open(os.path.join(event_mask_folder, f), 'r') as _f:
                events = _f.readlines()
                events = set([e.strip() for e in events])
                event_mask_dict[code] = events
            # end with
        # end for
    # end if

    if event_mask_dict:
        log.info("Loaded {} event masks".format(len(event_mask_dict)))
    # end if

    # Plot all data to PDF file
    fixed_stack_height_inches = 0.8
    y_pad_inches = 1.6
    total_trace_height_inches = paper_size_A4[
        1] - fixed_stack_height_inches - y_pad_inches
    max_trace_height = 0.2

    log.setLevel(logging.WARNING)

    with PdfPages(output_file) as pdf:
        # Would like to use Tex, but lack desktop PC privileges to update packages to what is required
        plt.rc('text', usetex=False)
        pbar = tqdm.tqdm(total=len(data_dict))
        network = data_dict.network
        rf_type = data_dict.rotation
        hk_soln = dict()
        station_coords = dict()
        for st in sorted(data_dict.keys()):
            station_db = data_dict[st]

            pbar.update()
            pbar.set_description("{}.{}".format(network, st))

            # Choose RF channel
            channel = rf_util.choose_rf_source_channel(rf_type, station_db)
            channel_data = station_db[channel]
            if not channel_data:
                continue
            # end if
            full_code = '.'.join([network, st, channel])

            t_channel = list(channel)
            t_channel[-1] = 'T'
            t_channel = ''.join(t_channel)

            rf_stream = rf.RFStream(channel_data).sort(['back_azimuth'])
            if event_mask_dict and full_code in event_mask_dict:
                # Select events from external source
                event_mask = event_mask_dict[full_code]
                rf_stream = rf.RFStream([
                    tr for tr in rf_stream if tr.stats.event_id in event_mask
                ]).sort(['back_azimuth'])
            # end if
            if apply_amplitude_filter:
                # Label and filter quality
                rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream)
                rf_stream = rf.RFStream([
                    tr for tr in rf_stream if tr.stats.predicted_quality == 'a'
                ]).sort(['back_azimuth'])
            # end if
            if not rf_stream:
                continue
            if apply_similarity_filter and len(rf_stream) >= 3:
                rf_stream = rf_util.filter_crosscorr_coeff(rf_stream)
            # end if
            if not rf_stream:
                continue

            # Find matching T-component data
            events = [tr.stats.event_id for tr in rf_stream]
            transverse_data = station_db[t_channel]
            t_stream = rf.RFStream([
                tr for tr in transverse_data if tr.stats.event_id in events
            ]).sort(['back_azimuth'])

            # Plot pinwheel of primary and transverse components
            fig = rf_plot_utils.plot_rf_wheel([rf_stream, t_stream],
                                              fontscaling=0.8)
            fig.set_size_inches(*paper_size_A4)
            plt.tight_layout()
            plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15)
            ax = fig.gca()
            fig.text(-0.32,
                     -0.32,
                     "\n".join(rf_stream[0].stats.processing),
                     fontsize=6,
                     transform=ax.transAxes)
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            num_traces = len(rf_stream)
            assert len(t_stream) == num_traces or not t_stream

            # Plot RF stack of primary component
            trace_ht = min(total_trace_height_inches / num_traces,
                           max_trace_height)
            fig = rf_plot_utils.plot_rf_stack(
                rf_stream,
                trace_height=trace_ht,
                stack_height=fixed_stack_height_inches,
                fig_width=paper_size_A4[0])
            fig.suptitle("Channel {}".format(rf_stream[0].stats.channel))
            # Customize layout to pack to top of page while preserving RF plots aspect ratios
            _rf_layout_A4(fig)
            # Save to new page in file
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            # Plot RF stack of transverse component
            if t_stream:
                fig = rf_plot_utils.plot_rf_stack(
                    t_stream,
                    trace_height=trace_ht,
                    stack_height=fixed_stack_height_inches,
                    fig_width=paper_size_A4[0])
                fig.suptitle("Channel {}".format(t_stream[0].stats.channel))
                # Customize layout to pack to top of page while preserving RF plots aspect ratios
                _rf_layout_A4(fig)
                # Save to new page in file
                pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
                plt.close()
            # end if

            # Plot H-k stack using primary RF component
            fig, maxima = _produce_hk_stacking(rf_stream,
                                               weighting=hk_weights,
                                               labelling=hk_solution_labels,
                                               V_p=hk_vp)
            if save_hk_solution and hk_hpf_freq is None:
                hk_soln[st] = maxima
                station_coords[st] = (channel_data[0].stats.station_latitude,
                                      channel_data[0].stats.station_longitude)
            # end if
            paper_landscape = (paper_size_A4[1], paper_size_A4[0])
            fig.set_size_inches(*paper_landscape)
            # plt.tight_layout()
            # plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15)
            pdf.savefig(dpi=300, papertype='a4', orientation='landscape')
            plt.close()

            if hk_hpf_freq is not None:
                # Repeat H-k stack with high pass filtering
                fig, maxima = _produce_hk_stacking(
                    rf_stream,
                    weighting=hk_weights,
                    labelling=hk_solution_labels,
                    V_p=hk_vp,
                    filter_options={
                        'type': 'highpass',
                        'freq': hk_hpf_freq,
                        'corners': 1,
                        'zerophase': True
                    })
                if save_hk_solution:
                    hk_soln[st] = maxima
                    station_coords[st] = (
                        channel_data[0].stats.station_latitude,
                        channel_data[0].stats.station_longitude)
                # end if
                fig.set_size_inches(*paper_landscape)
                pdf.savefig(dpi=300, papertype='a4', orientation='landscape')
                plt.close()
            # end if

        # end for
        pbar.close()
    # end with

    # Save H-k solutions to CSV file
    if hk_soln:
        assert len(hk_soln) == len(station_coords)
        # Sort H-k solutions by depth from low to high
        update_dict = {}
        for st, hks in hk_soln.items():
            sorted_hks = sorted([tuple(hk) for hk in hks])
            update_dict[st] = np.array(
                list(station_coords[st]) +
                [i for hk in sorted_hks for i in hk])
        # end for
        hk_soln.update(update_dict)

        df = pd.DataFrame.from_dict(hk_soln, orient='index')
        colnames = [('H{}'.format(i), 'k{}'.format(i))
                    for i in range((len(df.columns) - 2) // 2)]
        colnames = ['Latitude', 'Longitude'] + list(
            itertools.chain.from_iterable(colnames))
        df.columns = colnames
        csv_fname, _ = os.path.splitext(output_file)
        csv_fname += '.csv'
        df.index.name = 'Station'
        df.to_csv(csv_fname)
Exemplo n.º 17
0
def run_batch(transect_file,
              rf_waveform_file,
              fed_db_file,
              amplitude_filter=False,
              similarity_filter=False,
              stack_scale=0.4,
              width=30.0,
              spacing=2.0,
              max_depth=200.0,
              channel='R',
              output_folder='',
              colormap='seismic',
              annotators=None):
    """Run CCP generation in batch mode along a series of transects.

    :param transect_file: File containing specification of network and station locations of ends of transects
    :type transect_file: str or Path
    :param rf_waveform_file: HDF5 file of QA'd receiver functions for the network matching the transect file
    :type rf_waveform_file: str or Path
    :param fed_db_file: Name of file with which to initialize FederatedASDFDataBase
    :type fed_db_file: str or Path
    :param amplitude_filter: Whether to use amplitude-based filtering of waveforms beform plotting.
    :type amplitude_filter: bool
    :param similarity_filter: Whether to use RF waveform similarity filtering of waveforms beform plotting.
    :type similarity_filter: bool
    :param stack_scale: Max value to represent on color scale of CCP plot
    :type stack_scale: float
    :param width: Width of transect (km)
    :type width: float
    :param spacing: Discretization size (km) for RF ray sampling
    :type spacing: float
    :param max_depth: Maximum depth of slice below the transect line (km)
    :type max_depth: float
    :param channel: Channel component ID to source for the RF amplitude
    :type channel: str length 1
    :return: None
    """

    print("Reading HDF5 file...")
    rf_stream = rf.read_rf(rf_waveform_file, 'H5').select(component=channel)

    rf_type = rf_stream[0].stats.rotation
    if amplitude_filter:
        # Label and filter quality
        rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream)
        rf_stream = rf.RFStream(
            [tr for tr in rf_stream if tr.stats.predicted_quality == 'a'])
    # end if

    # For similarity filtering, similarity filtering must applied to one station at a time.
    if similarity_filter:
        data_dict = rf_util.rf_to_dict(rf_stream)
        rf_stream = rf.RFStream()
        for _sta, ch_dict in data_dict:
            for _cha, ch_traces in ch_dict.items():
                if len(ch_traces) >= 3:
                    # Use short time window that cuts off by 10 sec, since we're only interested in Ps phase here.
                    filtered_traces = rf_util.filter_crosscorr_coeff(
                        rf.RFStream(ch_traces),
                        time_window=(-2, 10),
                        apply_moveout=True)
                    rf_stream += filtered_traces
                else:
                    rf_stream += rf.RFStream(ch_traces)
                # end if
            # end for
        # end for
    # end if

    spectral_filter = {
        'type': 'highpass',
        'freq': 0.2,
        'corners': 1,
        'zerophase': True
    }
    if spectral_filter is not None:
        rf_stream.filter(**spectral_filter)
    # end if

    db = FederatedASDFDataSet.FederatedASDFDataSet(fed_db_file)
    sta_coords = db.unique_coordinates

    if output_folder and not os.path.isdir(output_folder):
        assert not os.path.isfile(output_folder)
        os.makedirs(output_folder, exist_ok=True)
    # end if

    with open(transect_file, 'r') as f:
        net = f.readline().strip()
        for transect in f.readlines():

            if not transect.strip():
                continue

            sta_start, sta_end = transect.split(',')
            sta_start = sta_start.strip()
            sta_end = sta_end.strip()
            start = '.'.join([net, sta_start])
            end = '.'.join([net, sta_end])
            start = np.array(sta_coords[start])
            end = np.array(sta_coords[end])

            # Offset ends slightly to make sure we don't lose end stations due to truncation error.
            # Note: for simplicity this treats lat/lon like cartesian coords, but this is approximate
            # and will break down near poles, for long transects, or if transect crosses the antimeridian.
            dirn = (end - start)
            dirn = dirn / np.linalg.norm(dirn)
            start -= LEAD_INOUT_DIST_KM * dirn / KM_PER_DEG
            end += LEAD_INOUT_DIST_KM * dirn / KM_PER_DEG
            start_latlon = (start[1], start[0])
            end_latlon = (end[1], end[0])

            title = 'Network {} CCP R-stacking (profile {}-{})'.format(
                net, sta_start, sta_end)
            hf_main, hf_map, metadata = run(rf_stream,
                                            start_latlon,
                                            end_latlon,
                                            width,
                                            spacing,
                                            max_depth,
                                            channel,
                                            stacked_scale=stack_scale,
                                            title=title,
                                            colormap=colormap,
                                            background_model='ak135_60')

            metadata['transect_start'] = start
            metadata['transect_end'] = end
            metadata['transect_dirn'] = dirn
            if annotators is not None:
                for ant in annotators:
                    ant(hf_main, metadata)
                # end for
            # end if

            outfile_base = '{}-ZRT-R_CCP_stack_{}-{}_{}km_spacing'.format(
                net, sta_start, sta_end, spacing)
            outfile = outfile_base + '.pdf'
            outfile_map = outfile_base + '_MAP.pdf'

            outfile = os.path.join(output_folder, outfile)
            outfile_map = os.path.join(output_folder, outfile_map)

            if hf_main is not None:
                hf_main.savefig(outfile, dpi=300)
                plt.close(hf_main)
            # endif

            if hf_map is not None:
                hf_map.savefig(outfile_map, dpi=300)
                plt.close(hf_map)
Exemplo n.º 18
0
def main(input_file,
         output_file,
         event_mask_folder='',
         apply_amplitude_filter=False,
         apply_similarity_filter=False,
         hk_weights=DEFAULT_HK_WEIGHTS):

    # Read source file
    data_all = rf_util.read_h5_rf(input_file)

    # Convert to hierarchical dictionary format
    data_dict = rf_util.rf_to_dict(data_all)

    event_mask_dict = None
    if event_mask_folder and os.path.isdir(event_mask_folder):
        mask_files = os.listdir(event_mask_folder)
        mask_files = [
            f for f in mask_files
            if os.path.isfile(os.path.join(event_mask_folder, f))
        ]
        # print(mask_files)
        pattern = r"([A-Za-z0-9\.]{5,})_event_mask\.txt"
        pattern = re.compile(pattern)
        event_mask_dict = dict()
        for f in mask_files:
            match_result = pattern.match(f)
            if not match_result:
                continue
            code = match_result[1]
            # print(code)
            with open(os.path.join(event_mask_folder, f), 'r') as f:
                events = f.readlines()
                events = set([e.strip() for e in events])
                event_mask_dict[code] = events
            # end with
        # end for
    # end if

    if event_mask_dict:
        print("Loaded {} event masks".format(len(event_mask_dict)))
    # end if

    # Plot all data to PDF file
    fixed_stack_height_inches = 0.8
    y_pad_inches = 1.6
    total_trace_height_inches = paper_size_A4[
        1] - fixed_stack_height_inches - y_pad_inches
    max_trace_height = 0.2

    with PdfPages(output_file) as pdf:
        # Would like to use Tex, but lack desktop PC privileges to update packages to what is required
        plt.rc('text', usetex=False)
        pbar = tqdm.tqdm(total=len(data_dict))
        network = data_dict.network
        rf_type = data_dict.rotation
        for st in sorted(data_dict.keys()):
            station_db = data_dict[st]

            pbar.update()
            pbar.set_description("{}.{}".format(network, st))

            # Choose RF channel
            channel = rf_util.choose_rf_source_channel(rf_type, station_db)
            channel_data = station_db[channel]
            full_code = '.'.join([network, st, channel])

            t_channel = list(channel)
            t_channel[-1] = 'T'
            t_channel = ''.join(t_channel)

            rf_stream = rf.RFStream(channel_data).sort(['back_azimuth'])
            if event_mask_dict and full_code in event_mask_dict:
                # Select events from external source
                event_mask = event_mask_dict[full_code]
                rf_stream = rf.RFStream([
                    tr for tr in rf_stream if tr.stats.event_id in event_mask
                ]).sort(['back_azimuth'])
            # end if
            if apply_amplitude_filter:
                # Label and filter quality
                rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream)
                rf_stream = rf.RFStream([
                    tr for tr in rf_stream if tr.stats.predicted_quality == 'a'
                ]).sort(['back_azimuth'])
            # end if
            if apply_similarity_filter:
                rf_stream = rf_util.filter_crosscorr_coeff(rf_stream)
            # end if

            if not rf_stream:
                continue

            # Find matching T-component data
            events = [tr.stats.event_id for tr in rf_stream]
            transverse_data = station_db[t_channel]
            t_stream = rf.RFStream([
                tr for tr in transverse_data if tr.stats.event_id in events
            ]).sort(['back_azimuth'])
            if not t_stream:
                continue

            # Plot pinwheel of primary and transverse components
            fig = rf_plot_utils.plot_rf_wheel([rf_stream, t_stream],
                                              fontscaling=0.8)
            fig.set_size_inches(*paper_size_A4)
            plt.tight_layout()
            plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15)
            ax = fig.gca()
            fig.text(-0.32,
                     -0.32,
                     "\n".join(rf_stream[0].stats.processing),
                     fontsize=6,
                     transform=ax.transAxes)
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            num_traces = len(rf_stream)
            assert len(t_stream) == num_traces

            # Plot RF stack of primary component
            trace_ht = min(total_trace_height_inches / num_traces,
                           max_trace_height)
            fig = rf_plot_utils.plot_rf_stack(
                rf_stream,
                trace_height=trace_ht,
                stack_height=fixed_stack_height_inches,
                fig_width=paper_size_A4[0])
            fig.suptitle("Channel {}".format(rf_stream[0].stats.channel))
            # Customize layout to pack to top of page while preserving RF plots aspect ratios
            _rf_layout_A4(fig)
            # Save to new page in file
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            # Plot RF stack of transverse component
            fig = rf_plot_utils.plot_rf_stack(
                t_stream,
                trace_height=trace_ht,
                stack_height=fixed_stack_height_inches,
                fig_width=paper_size_A4[0])
            fig.suptitle("Channel {}".format(t_stream[0].stats.channel))
            # Customize layout to pack to top of page while preserving RF plots aspect ratios
            _rf_layout_A4(fig)
            # Save to new page in file
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            # Plot H-k stack using primary RF component
            fig = _produce_hk_stacking(rf_stream, weighting=hk_weights)
            paper_landscape = (paper_size_A4[1], paper_size_A4[0])
            fig.set_size_inches(*paper_landscape)
            # plt.tight_layout()
            # plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15)
            pdf.savefig(dpi=300, papertype='a4', orientation='landscape')
            plt.close()

        # end for
        pbar.close()
Exemplo n.º 19
0
def test_rf_integration():
    import rf

    # Synthesize known RF.
    inclinations = np.array([20.0, 15.0, 10.0])
    distances = np.array([60.0, 70.0, 80.0])
    amplitudes = [1, 0.4, 0.2]
    rf_radial, F_s = _generate_rf_radial(inclinations, distances, amplitudes)

    time_shift = rf_radial[0].stats.onset - rf_radial[0].stats.starttime
    times = rf_radial[0].times() - time_shift

    # Generate synthetic vertical seismic trace.
    g = _generate_synthetic_source(times)

    # Collect test stream data with R and Z components into a RFStream object.
    rf_stream = rf.RFStream()
    np.random.seed(20190925)
    g_noise_scale = 5.0e-3 * np.abs(g).max()
    g_funcs = []
    f_funcs = []
    for i, tr in enumerate(rf_radial):
        # Assign trackable event id
        tr.stats.event_id = i
        # Add some random noise to the source signal
        g_noisy = g + np.random.normal(scale=g_noise_scale, size=g.shape)
        g_funcs.append(g_noisy)
        # Create source RFTrace
        src_tr = tr.copy()
        src_tr.data = g_noisy.copy()
        src_tr.stats.channel = 'HHZ'
        # Synthesize response signal and add some noise
        f = _generate_radial_from_src(tr.data, g_noisy, times)
        f_noise_scale = 5.0e-3 * np.abs(f).max()
        f_noisy = f + np.random.normal(scale=f_noise_scale, size=f.shape)
        f_funcs.append(f_noisy)
        # Create response RFTrace
        rsp_tr = tr.copy()
        rsp_tr.data = f_noisy.copy()
        rf_stream += src_tr
        rf_stream += rsp_tr
    # end for

    # Use rf library to compute comparative signals using time and freq domain deconvolution
    rf_freq = rf_stream.copy().rf(method='P', rotate=None,
                                  deconvolve='freq').select(component='R')
    try:
        rf_time = rf_stream.copy().rf(method='P',
                                      rotate=None,
                                      deconvolve='time').select(component='R')
    except NameError:
        import warnings
        # If Toeplitz not present on platform, rf may be unable to perform time-domain deconvolution
        warnings.warn(
            "Unable to test default time-domain deconvolution from rf library")
        rf_time = None
    # end try

    # Call rf generator on rf.RFStream using our custom deconvolution function
    rf_iter = rf_stream.copy().rf(method='P',
                                  rotate=None,
                                  deconvolve='func',
                                  func=rf_iter_deconv,
                                  normalize=0).select(component='R')

    # Perform deconv directly and compare with rf_iter to check that rf calls used our custom function.
    for i, (f, g) in enumerate(zip(f_funcs, g_funcs)):
        x, _, _, _, fit = iter_deconv_pulsetrain(f, g, F_s, time_shift)
        assert np.isclose(100.0, fit, rtol=1e-2)
        # Infer scaling factor due to normalization from max point, and use it to normalize x.
        norm_factor = np.nanmax(x) / np.nanmax(rf_iter[i].data)
        x /= norm_factor
        assert rf_iter[i].stats.event_id == i
        assert np.allclose(rf_iter[i].data, x, rtol=1e-3, atol=5e-3)
    # end for

    # Check that the local maxima of RF peaks found for different techniques all agree.
    # We expect exact agreement since the test data is very simple.
    def _local_maxima_mask_1d(arr):
        return (arr[1:-1] > arr[0:-2]) & (arr[1:-1] > arr[2:])

    # end func
    def _rms(arr):
        return np.sqrt(np.mean(np.square(arr)))

    # end func
    for i, tr in enumerate(rf_iter):
        d = tr.data
        # Due to wiggles producing spurious local maxima for certain deconvolution methods (expected),
        # we only check that the maxima generated using iterative deconv are in common with other
        # techniques, rather than expect exact matches in the location of all local maxima.
        expected_mask = _local_maxima_mask_1d(d) & (d[1:-1] > _rms(d))
        mask_idx = np.nonzero(expected_mask)[0]
        d_fd = rf_freq[i].data
        fd_mask = _local_maxima_mask_1d(d_fd) & (d_fd[1:-1] > _rms(d_fd))
        fd_mask_idx = np.nonzero(fd_mask)[0]
        assert np.all(np.isin(mask_idx, fd_mask_idx))
        if rf_time is not None:
            d_td = rf_time[i].data
            td_mask = _local_maxima_mask_1d(d_td) & (d_td[1:-1] > _rms(d_td))
            td_mask_idx = np.nonzero(td_mask)[0]
            assert np.all(np.isin(mask_idx, td_mask_idx))