Пример #1
0
def heatmap_plot(dmat_file,
                 big_tribe,
                 raw_wav_dir,
                 tick_int=20,
                 title=None,
                 show=True):
    mat = 1.0 - np.load(dmat_file)  # More intuitive to use CCC
    # Make list of dates
    big_tribe.sort()
    raw_wav_files = glob('%s/*' % raw_wav_dir)
    raw_wav_files.sort()
    all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files]
    names = [t.name for t in big_tribe if t.name in all_wavs]
    new_tribe = Tribe()
    new_tribe.templates = [temp for temp in big_tribe if temp.name in names]
    times = [
        template.event.origins[-1].time.strftime('%Y-%m-%d')
        for template in new_tribe
    ][::tick_int]
    ax = sns.heatmap(mat,
                     vmin=-0.4,
                     vmax=0.6,
                     cmap='vlag',
                     yticklabels=tick_int,
                     xticklabels=False,
                     cbar_kws={'label': 'CCC'})
    ax.set_yticklabels(times, fontsize=6)
    if title:
        ax.set_title(title)
    plt.tight_layout()
    if show:
        plt.show()
        plt.close()
    return ax
Пример #2
0
def decluster_day_parties(party_dir, tribe_dir, trig_int, min_chan, metric,
                          start, end):
    """

    :param party_dir: Directory housing the Party files from match_filter
    :param trig_int: Minimum separation dist between detections in secs
    :param min_chan: Minimum number of channels used in detection
    :param metric: 'avg_cor' or 'cor_sum'
    :param start: Start UTCDateTime for instance
    :param end: End UTCDateTime for instance.
    :return:
    """
    from glob import glob
    from obspy import UTCDateTime
    from eqcorrscan.core.match_filter import Party, Tribe

    all_parties = glob('%s/*[0-9].tgz' % party_dir)
    party_files = [f for f in all_parties
                   if UTCDateTime(f.split('_')[-2]) > start - 1 and
                   UTCDateTime(f.split('_')[-2]) < end + 1]
    all_files = glob('%s/*' % party_dir)
    party_files.sort()
    num = 0
    print('Reading tribes to memory')
    tribes = [(Tribe().read(tribe_file),
               tribe_file.split('_')[-1].split('.')[0])
              for tribe_file in glob('%s/*' % tribe_dir)]
    print('Starting declustering')
    for i, party_file in enumerate(party_files):
        strt = UTCDateTime()
        print('Processing party %s at %02d:%02d:%02d' % (party_file,
                                                         strt.hour,
                                                         strt.minute,
                                                         strt.second))
        party = Party()
        party.read(party_file)
        for tribe in tribes:
            outfile = '%s_min%02d_%s_%s_declust' % (party_file.split('.')[0],
                                                    min_chan, metric,
                                                    tribe[1])
            if '%s.tgz' % outfile in all_files:
                print('Already wrote %s.tgz' % outfile)
                continue
            print('Working on tribe %s' % tribe[1])
            num += 1
            print('Original Party has length %d' % len(party))
            print('Partitioning into cluster: %s' % tribe[1])
            part_party = partition_party_by_tribe(party, tribe[0])
            print('Enforcing minimum no_chans')
            part_party.min_chans(min_chan)
            print('Declustering')
            try:
                part_party.decluster(trig_int=trig_int, metric=metric)
            except IndexError as msg:
                print('Declustering failed with %s\n' % msg)
                print('Probably no detections')
                continue
            print('Writing party to %s' % outfile)
            part_party.write(outfile)
    return
Пример #3
0
def Tribe_2_Detector(tribe_dir,
                     raw_wavs,
                     outdir,
                     lowcut,
                     highcut,
                     filt_order,
                     samp_rate,
                     shift_len,
                     reject,
                     dimension,
                     prepick,
                     length,
                     multiplex=False):
    """
    Take a directory of cluster-defined Tribes and write them to Detectors
    :param tribe_dir:
    :return:
    """

    tribe_files = glob('%s/*.tgz' % tribe_dir)
    tribe_files.sort()
    wav_files = glob('%s/*' % raw_wavs)
    for tfile in tribe_files:
        tribe = Tribe().read(tfile)
        print('Working on Tribe: %s' % tfile)
        templates = []
        for temp in tribe:
            try:
                wav = read([
                    wav for wav in wav_files
                    if wav.split('/')[-1].split('.')[0] == temp.name
                ][0])
            except IndexError:
                print('Event not above SNR 1.5')
                continue
            wav.traces = [
                tr.trim(starttime=tr.stats.starttime + 2 - prepick,
                        endtime=tr.stats.starttime + 2 - prepick + length)
                for tr in wav if tr.stats.channel[-1] == 'Z'
            ]
            templates.append(wav)
        # Now construct the detector
        detector = Detector()
        detector.construct(streams=templates,
                           lowcut=lowcut,
                           highcut=highcut,
                           filt_order=filt_order,
                           sampling_rate=samp_rate,
                           multiplex=multiplex,
                           name=tfile.split('/')[-1].split('.')[0],
                           align=True,
                           shift_len=shift_len,
                           reject=reject,
                           no_missed=False)
        detector.write('%s/%s_detector' %
                       (outdir, tfile.split('/')[-1].split('.')[0]))
    return
Пример #4
0
def make_synthetic_tribe(
    n_templates: int,
    n_channels: int,
    process_length: float,
    template_length: float,
    sampling_rate: float = 100.0,
) -> Tribe:
    """
    Generate a synthetic tribe of templates

    Parameters
    ----------
    n_templates
        Number of templates to generate
    n_channels
        Number of channels for each template
    process_length
        Process length in seconds
    template_length
        Template length in seconds
    sampling_rate
        Sampling rate in Hz

    Returns
    -------
    Tribe of synthetic templates
    """
    nslc = [(random_string(2).upper(), random_string(4).upper(),
             random_string(2).upper(), random_string(3).upper())
            for _ in range(n_channels)]
    tribe = Tribe()
    for i in range(n_templates):
        st = Stream()
        for j in range(n_channels):
            tr = Trace(data=np.random.randn(
                int(sampling_rate * template_length)),
                       header=dict(network=nslc[j][0],
                                   station=nslc[j][1],
                                   location=nslc[j][2],
                                   channel=nslc[j][3],
                                   sampling_rate=sampling_rate))
            st += tr
        name = f"synth_template_{i}"
        tribe.templates.append(
            Template(name=name,
                     st=st,
                     lowcut=2.0,
                     highcut=10.0,
                     samp_rate=sampling_rate,
                     filt_order=4,
                     prepick=0.1,
                     process_length=process_length))
    return tribe
Пример #5
0
    def get_templates(self, **kwargs) -> Tribe:
        """
        Get template waveforms from the database

        Supports passing an `concurrent.futures.Executor` using the `executor`
        keyword argument for parallel reading.

        {get_event_params}
        """
        paths = self.bank_path + self.read_index(
            columns=["path", "latitude", "longitude"], **kwargs).path
        paths = [path.replace(self.ext, self.template_ext) for path in paths]
        future = self.executor.map(_lazy_template_read, paths)
        return Tribe([t for t in future if t is not None])
def catalog_to_templates(catalog):

    tribe = Tribe()
    for hour in range(0, 24):
        pattern = os.path.join(WF_DIR_ROOT, "G*", "*..DP*%s-%s-%s_%02d*2020*" % (year, month, day, hour))
        if not glob(pattern):
            Logger.info("No data found for hour %d" % hour)
            continue
        st_hr = read(pattern)
        st_hr.resample(SAMPLING_RATE)
        st_hr.detrend()

        starttime = st_hr[0].stats.starttime
        endtime = st_hr[0].stats.endtime
        cat_hr = Catalog()
        for ev in catalog:
            if starttime < ev.picks[0].time < endtime:
                cat_hr += ev
        Logger.info("Processing %d detections during hour %d" % (len(cat_hr), hour))

        # For each pick find the channel with highest amplitude
        cat_template = Catalog()
        for iev, ev in enumerate(cat_hr):
            nbad = 0
            for ip, p in enumerate(ev.picks):
                sta = p.waveform_id.station_code
                cut_st = st_hr.select(station=sta).slice(starttime=p.time-0.02, endtime=p.time + 0.1)
                imax = np.argmax(cut_st.max())
                ev.picks[ip].waveform_id.channel_code = cut_st[imax].stats.channel
                ratio = highf_ratio(data=cut_st[imax].data)
                if ratio > 0.75:
                    nbad += 1
            if nbad > 3:
                Logger.info("Removing event with ratio of high frequencies = %f" % ratio)
            elif len(ev.picks) > 4:
                cat_template += ev

        # Make a tribe for this hour
        if len(cat_template) > 0:
            tribe_hour = construct_tribe(catalog=cat_template, stream=st_hr)
            tribe += tribe_hour
            Logger.info("Adding: %s" % tribe_hour)

    Logger.info("Initial tribe for this whole day: %s" % tribe)
    return tribe
Пример #7
0
def _download_and_make_template(
    event: Event,
    client,
    download_data_len: float,
    path_structure: str,
    bank_path: str,
    template_name_structure: str,
    save_raw: bool,
    **kwargs,
) -> Template:
    """ Make the template using downloaded data"""
    Logger.debug("Making template for event {0}".format(event.resource_id))
    _process_len = kwargs.pop("process_len", download_data_len)
    if _process_len > download_data_len:
        Logger.info(
            "Downloading {0}s of data as required by process len".format(
                _process_len))
        download_data_len = _process_len
    st = _get_data_for_event(event=event,
                             client=client,
                             download_data_len=download_data_len,
                             path_structure=path_structure,
                             bank_path=bank_path,
                             template_name_structure=template_name_structure,
                             save_raw=save_raw)
    Logger.debug("Downloaded {0} traces for event {1}".format(
        len(st), event.resource_id))
    tribe = Tribe().construct(method="from_meta_file",
                              meta_file=Catalog([event]),
                              st=st,
                              process_len=download_data_len,
                              **kwargs)
    try:
        template = tribe[0]
        Logger.info("Made template: {0}".format(template))
    except IndexError as e:
        Logger.error(e)
        return None
    template.name = event.resource_id.id.split('/')[-1]
    # Edit comment to reflect new template_name
    for comment in template.event.comments:
        if comment.text.startswith("eqcorrscan_template_"):
            comment.text = "eqcorrscan_template_{0}".format(template.name)
    return template
Пример #8
0
    if glob(pattern_hawk):
        ndt = 1 / 250.0
        wf_len_s = int(8.0 / ndt) * ndt
        prepick = 4.0
        detst += read(pattern_hawk,
                      starttime=tmin - prepick,
                      endtime=tmin - prepick + wf_len_s)
    detst.detrend("demean")
    return detst


if __name__ == "__main__":

    daystr = sys.argv[1]
    print("************ %s ************" % daystr)
    tribe = Tribe().read(os.path.join(party_dir, "tribe_day%s.tgz" % daystr))
    print(tribe)
    if not tribe.templates:
        print("no templates...")
        sys.exit()

    for template in tribe:
        if not template:
            continue
        fname = os.path.join(out_dir, "%s_full_template.mseed" % template.name)
        if os.path.exists(fname):
            continue
        num_p_picks = len(
            [p for p in template.event.picks if p.phase_hint == "P"])
        has_magn = len(template.event.magnitudes) > 0
        if len(template.st) > 5 and (num_p_picks > 0 or has_magn):
Пример #9
0
def check_tribe_quality(tribe: Tribe,
                        seed_ids: set = None,
                        min_stations: int = None,
                        lowcut: float = None,
                        highcut: float = None,
                        filt_order: int = None,
                        samp_rate: float = None,
                        process_len: float = None,
                        *args,
                        **kwargs) -> Tribe:
    """
    Check that templates in the tribe have channels all the same length.

    Parameters
    ----------
    tribe
        A Tribe to check the quality of.
    seed_ids
        seed-ids of channels to be included in the templates - if None,
        then all channels will be included
    min_stations
        Minimum number of stations for a template to be included.
    lowcut
        Desired template processing lowcut in Hz, if None, will not check
    highcut
        Desired template processing highcut in Hz, if None, will not check
    filt_order
        Desired template filter order, if None, will not check
    samp_rate
        Desired template sampling rate in Hz, if None, will not check
    process_len
        Desired template processing length in s, if None, will not check

    Returns
    -------
    A filtered tribe.
    """
    processing_keys = dict(lowcut=lowcut,
                           highcut=highcut,
                           filt_order=filt_order,
                           samp_rate=samp_rate,
                           process_length=process_len)
    Logger.info("Checking processing parameters: {0}".format(processing_keys))
    min_stations = min_stations or 0
    _templates = []
    # Perform length check
    for template in tribe:
        counted_lengths = Counter([tr.stats.npts for tr in template.st])
        if len(counted_lengths) > 1:
            Logger.warning(
                "Multiple lengths found in template, using most common"
                " ({0})".format(counted_lengths.most_common(1)[0][0]))
            _template = template.copy()
            _template.st = Stream()
            for tr in template.st:
                if tr.stats.npts == counted_lengths.most_common(1)[0][0]:
                    _template.st += tr
            _templates.append(_template)
        else:
            _templates.append(template)
    templates = _templates

    # Check processing parameters
    _templates = []
    for template in templates:
        for processing_key, processing_value in processing_keys.items():
            if processing_value and template.__dict__[
                    processing_key] != processing_value:
                Logger.warning("Template {0}: {1} does not match {2}".format(
                    processing_key, template.__dict__[processing_key],
                    processing_value))
                break
        else:
            _templates.append(template)
    templates = _templates

    # Perform station check
    if seed_ids is None:
        seed_ids = {tr.id for template in tribe for tr in template.st}
    for template in templates:
        _st = Stream()
        for tr in template.st:
            if tr.id in seed_ids:
                _st += tr
        template.st = _st
    return Tribe([
        t for t in templates if len({tr.stats.station
                                     for tr in t.st}) > min_stations
    ])
Пример #10
0
    def make_templates(
        self,
        catalog: Catalog,
        stream: Stream = None,
        client=None,
        download_data_len: float = 90,
        save_raw: bool = True,
        update_index: bool = False,
        **kwargs,
    ) -> Tribe:
        """
        Make templates from data or client based on a given catalog.

        Templates will be put in the database. Requires either a stream or
        a suitable client with a get_waveforms method.

        Parameters
        ----------
        catalog
            Catalog of events to generate templates from.
        stream
            Optional: Stream encompassing the events in the catalog
        client
            Optional: Client with at-least a `get_waveforms` method, ideally
            the client should make the data for the events in catalog
            available.
        download_data_len
            If client is given this is the length of data to download. The
            raw continuous data will also be saved to disk to allow later
            processing if save_raw=True
        save_raw
            Whether to store raw data on disk as well - defaults to True.
        update_index
            Flag to indicate whether or not to update the event index after
            writing the new events. Default is False.
        kwargs
            Keyword arguments supported by EQcorrscan's `Template.construct`
            method. Requires at least:
              - lowcut
              - highcut
              - samp_rate
              - filt_order
              - prepick
              - length
              - swin

        Returns
        -------
        Tribe of templates
        """
        assert client or stream, "Needs either client or stream"
        if stream is not None:
            tribe = Tribe().construct(method="from_metafile",
                                      meta_file=catalog,
                                      stream=stream,
                                      **kwargs)
        else:
            Logger.debug("Making templates")
            inner_download_and_make_template = partial(
                _download_and_make_template,
                client=client,
                download_data_len=download_data_len,
                path_structure=self.path_structure,
                bank_path=self.bank_path,
                template_name_structure=self.template_name_structure,
                save_raw=save_raw,
                **kwargs)
            template_iterable = self.executor.map(
                inner_download_and_make_template, catalog)
            tribe = Tribe([t for t in template_iterable if t is not None])
        self.put_templates(tribe, update_index=update_index)
        return tribe
Пример #11
0
def _download_and_make_template(
    event: Event,
    client,
    download_data_len: float,
    path_structure: str,
    bank_path: str,
    template_name_structure: str,
    save_raw: bool,
    rebuild: bool,
    **kwargs,
) -> Template:
    """ Make the template using downloaded data"""
    Logger.debug("Making template for event {0}".format(event.resource_id))
    if not rebuild:
        try:
            path = _summarize_template(
                event=event, path_struct=path_structure,
                name_struct=template_name_structure)["path"]
        except ValueError as e:
            Logger.error(f"Could not summarize event due to {e}")
            return None
        ppath = (Path(bank_path) / path).absolute()
        ppath.parent.mkdir(parents=True, exist_ok=True)
        output_path = str(ppath)
        if os.path.isfile(output_path):
            Logger.debug("Template exists and rebuild=False, skipping")
            return read_template(output_path)
    # Sanitize event - sometime Arrivals or not linked.
    pick_dict = {p.resource_id.id: p for p in event.picks}
    for origin in event.origins:
        origin.arrivals = [
            arr for arr in origin.arrivals 
            if arr.pick_id in pick_dict]
    _process_len = kwargs.pop("process_len", download_data_len)
    if _process_len > download_data_len:
        Logger.info(
            "Downloading {0}s of data as required by process len".format(
                _process_len))
        download_data_len = _process_len
    st = _get_data_for_event(
        event=event, client=client,
        download_data_len=download_data_len, path_structure=path_structure,
        bank_path=bank_path, template_name_structure=template_name_structure,
        save_raw=save_raw)
    if st is None:
        return None
    Logger.debug("Downloaded {0} traces for event {1}".format(
        len(st), event.resource_id))
    try:
        tribe = Tribe().construct(
            method="from_meta_file", meta_file=Catalog([event]), st=st,
            process_len=download_data_len, **kwargs)
    except Exception as e:
        Logger.error(e)
        return None
    try:
        template = tribe[0]
        Logger.info("Made template: {0}".format(template))
    except IndexError as e:
        Logger.error(e)
        return None
    template.name = event.resource_id.id.split('/')[-1]
    # Edit comment to reflect new template_name
    for comment in template.event.comments:
        if comment.text and comment.text.startswith("eqcorrscan_template_"):
            comment.text = "eqcorrscan_template_{0}".format(template.name)
    return template
def construct_tribe(catalog, stream):
    # parameters for templates
    ndt = 1 / SAMPLING_RATE
    prepick = int(0.05 / ndt) * ndt # this ensures all templates have same length
    length = int(0.5 / ndt) * ndt  # this ensures all templates have same length
    lowcut = None
    highcut = 60.0
    filt_order = 3
    process_len = stream[0].stats.endtime - stream[0].stats.starttime
    min_snr = 3
    num_cores = cpu_count()
    templates, catalog, process_lengths = custom_template_gen(method='from_meta_file',
                                                              meta_file=catalog,
                                                              process_len=process_len,
                                                              st=stream,
                                                              lowcut=lowcut,
                                                              highcut=highcut,
                                                              samp_rate=SAMPLING_RATE,
                                                              filt_order=filt_order,
                                                              length=length,
                                                              prepick=prepick,
                                                              swin='all',
                                                              all_horiz=False,
                                                              min_snr=min_snr,
                                                              num_cores=num_cores,
                                                              ignore_bad_data=True,
                                                              plot=False, return_event=True)

    from eqcorrscan.core.match_filter import Template
    from obspy.core.event import Comment, CreationInfo
    template_list = []
    for template, event, process_len in zip(templates, catalog,
                                            process_lengths):
        if len(template) < 4:
            Logger.warning("Less than 4 traces for this template. Skipping. Number of picks was %d" % len(event.picks))
            continue
        t = Template()
        for tr in template:
            if not np.any(tr.data.astype(np.float16)):
                Logger.warning('Data are zero in float16, missing data,'
                               ' will not use: {0}'.format(tr.id))
                template.remove(tr)
        if len(template) == 0:
            Logger.error('Empty template. Skipping')
            continue        
            
        # Check if traces have same length
        lengths = set([tr.stats.endtime - tr.stats.starttime for tr in template])
        if len(lengths) > 1:
            print("Traces don't have same lengths. Fixing")
            wf_len = list(lengths)[0]
            for tr in template:        
                tr.trim(starttime=tr.stats.starttime, endtime=tr.stats.starttime + wf_len, fill_value=0, pad=True)        
            
        t.st = template
        t.name = template.sort(['starttime'])[0]. \
            stats.starttime.strftime('%Y_%m_%dt%H_%M_%S')
        t.lowcut = lowcut
        t.highcut = highcut
        t.filt_order = filt_order
        t.samp_rate = SAMPLING_RATE
        t.process_length = process_len
        t.prepick = prepick
        event.comments.append(Comment(
            text="eqcorrscan_template_" + t.name))
        t.event = event
        template_list.append(t)
    tribe = Tribe(templates=template_list)

    return tribe
Пример #13
0
def cluster_tribe(tribe,
                  raw_wav_dir,
                  lowcut,
                  highcut,
                  samp_rate,
                  filt_order,
                  pre_pick,
                  length,
                  shift_len,
                  corr_thresh,
                  cores,
                  dist_mat=False,
                  show=False):
    """
    Cross correlate all templates in a tribe and return separate tribes for
    each cluster
    :param tribe:
    :return:

    .. Note: Functionality here is pilaged from align design as we don't
        want the multiplexed portion of that function.
    """

    tribe.sort()
    raw_wav_files = glob('%s/*' % raw_wav_dir)
    raw_wav_files.sort()
    all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files]
    names = [t.name for t in tribe if t.name in all_wavs]
    wavs = [
        wav for wav in raw_wav_files
        if wav.split('/')[-1].split('.')[0] in names
    ]
    new_tribe = Tribe()
    new_tribe.templates = [temp for temp in tribe if temp.name in names]
    print('Processing temps')
    temp_list = [(shortproc(read(tmp),
                            lowcut=lowcut,
                            highcut=highcut,
                            samp_rate=samp_rate,
                            filt_order=filt_order,
                            parallel=True,
                            num_cores=cores), template)
                 for tmp, template in zip(wavs, new_tribe)]
    print('Clipping traces')
    for temp in temp_list:
        print('Clipping template %s' % temp[1].name)
        for tr in temp[0]:
            pk = [
                pk for pk in temp[1].event.picks
                if pk.waveform_id.station_code == tr.stats.station
                and pk.waveform_id.channel_code == tr.stats.channel
            ][0]
            tr.trim(starttime=pk.time - shift_len - pre_pick,
                    endtime=pk.time - pre_pick + length + shift_len)
    trace_lengths = [
        tr.stats.endtime - tr.stats.starttime for st in temp_list
        for tr in st[0]
    ]
    clip_len = min(trace_lengths) - (2 * shift_len)
    stachans = list(
        set([(tr.stats.station, tr.stats.channel) for st in temp_list
             for tr in st[0]]))
    print('Aligning traces')
    for stachan in stachans:
        trace_list = []
        trace_ids = []
        for i, st in enumerate(temp_list):
            tr = st[0].select(station=stachan[0], channel=stachan[1])
            if len(tr) > 0:
                trace_list.append(tr[0])
                trace_ids.append(i)
            if len(tr) > 1:
                warnings.warn('Too many matches for %s %s' %
                              (stachan[0], stachan[1]))
        shift_len_samples = int(shift_len * trace_list[0].stats.sampling_rate)
        shifts, cccs = stacking.align_traces(trace_list=trace_list,
                                             shift_len=shift_len_samples,
                                             positive=True)
        for i, shift in enumerate(shifts):
            st = temp_list[trace_ids[i]][0]
            start_t = st.select(station=stachan[0],
                                channel=stachan[1])[0].stats.starttime
            start_t += shift_len
            start_t -= shift
            st.select(station=stachan[0],
                      channel=stachan[1])[0].trim(start_t, start_t + clip_len)
    print('Clustering')
    if isinstance(dist_mat, np.ndarray):
        groups = cluster_from_dist_mat(dist_mat=dist_mat,
                                       temp_list=temp_list,
                                       show=show,
                                       corr_thresh=corr_thresh)
    else:
        groups = clustering.cluster(temp_list,
                                    show=show,
                                    corr_thresh=corr_thresh,
                                    allow_shift=False,
                                    save_corrmat=True,
                                    cores=cores)
    group_tribes = []
    for group in groups:
        group_tribes.append(
            Tribe(templates=[
                Template(st=tmp[0],
                         name=tmp[1].name,
                         event=tmp[1].event,
                         highcut=highcut,
                         lowcut=lowcut,
                         samp_rate=samp_rate,
                         filt_order=filt_order,
                         prepick=pre_pick) for tmp in group
            ]))
    return group_tribes
Пример #14
0
    #     tmin = origt - 2.0
    #     tmax = origt + 6.0
    #     stations = list(set([p.waveform_id.station_code for p in event.picks]))
    #     print(stations)
    #     stream = get_stream_hour(tmin, tmax, stations)

    #     stream.write(stream_fname, format="MSEED")

    #     if event.origins[0].depth < 50:
    #         stream.write(os.path.join("templates_less50m", "%s.mseed" % name), format="MSEED")

    for f in templist:
        if "tribe" in f:
            continue
        print(f)
        tribe = Tribe().read(f)
        template = tribe[0]
        event = template.event
        depth = event.origins[0].depth
        if depth < 10:
            origt = event.origins[0].time
            evlat = event.origins[0].latitude
            evlon = event.origins[0].longitude

            tmin = origt - 0.5
            tmax = origt + 5.0
            name = template.name
            fname = os.path.join("templates", "waveforms", "%s.mseed" % name)
            stream = read(fname)
            for tr in stream:
                station = tr.stats.station
Пример #15
0
def cluster_cat(catalog,
                corr_thresh,
                corr_params=None,
                raw_wav_dir=None,
                dist_mat=False,
                out_cat=None,
                show=False,
                method='average'):
    """
    Cross correlate all templates in a tribe and return separate tribes for
    each cluster
    :param tribe: Tribe to cluster
    :param corr_thresh: Correlation threshold for clustering
    :param corr_params: Dictionary of filter parameters. Must include keys:
        lowcut, highcut, samp_rate, filt_order, pre_pick, length, shift_len,
        cores
    :param raw_wav_dir: Directory of waveforms to take from
    :param dist_mat: If there's a precomputed distance matrix, use this
        instead of doing all the correlations
    :param out_cat: Output catalog corresponding to the events
    :param show: Show the dendrogram? Careful as this can exceed max recursion
    :param wavs: Should we even bother with processing waveforms? Otherwise
        will just populate the tribe with an empty Stream
    :return:

    .. Note: Functionality here is pilaged from align design as we don't
        want the multiplexed portion of that function.
    """

    if corr_params and raw_wav_dir:
        shift_len = corr_params['shift_len']
        lowcut = corr_params['lowcut']
        highcut = corr_params['highcut']
        samp_rate = corr_params['samp_rate']
        filt_order = corr_params['filt_order']
        pre_pick = corr_params['pre_pick']
        length = corr_params['length']
        cores = corr_params['cores']
        raw_wav_files = glob('%s/*' % raw_wav_dir)
        raw_wav_files.sort()
        all_wavs = [wav.split('/')[-1].split('_')[-3] for wav in raw_wav_files]
        print(all_wavs[0])
        names = [
            ev.resource_id.id.split('/')[-1] for ev in catalog
            if ev.resource_id.id.split('/')[-1] in all_wavs
        ]
        print(names[0])
        wavs = [
            wav for wav in raw_wav_files
            if wav.split('/')[-1].split('_')[-3] in names
        ]
        print(wavs[0])
        new_cat = Catalog(events=[
            ev for ev in catalog if ev.resource_id.id.split('/')[-1] in names
        ])
        print('Processing temps')
        temp_list = [(shortproc(read('{}/*'.format(tmp)),
                                lowcut=lowcut,
                                highcut=highcut,
                                samp_rate=samp_rate,
                                filt_order=filt_order,
                                parallel=True,
                                num_cores=cores),
                      ev.resource_id.id.split('/')[-1])
                     for tmp, ev in zip(wavs, new_cat)]
        print('Clipping traces')
        rm_temps = []
        for i, temp in enumerate(temp_list):
            print('Clipping template %s' % new_cat[i].resource_id.id)
            rm_ts = []  # Make a list of traces with no pick to remove
            rm_ev = []
            for tr in temp[0]:
                pk = [
                    pk for pk in new_cat[i].picks
                    if pk.waveform_id.station_code == tr.stats.station
                    and pk.waveform_id.channel_code == tr.stats.channel
                ]
                if len(pk) == 0:
                    rm_ts.append(tr)
                else:
                    tr.trim(starttime=pk[0].time - shift_len - pre_pick,
                            endtime=pk[0].time - pre_pick + length + shift_len)
            # Remove pickless traces
            for rm in rm_ts:
                temp[0].traces.remove(rm)
            # If trace lengths are internally inconsistent, remove template
            if len(list(set([len(tr) for tr in temp[0]]))) > 1:
                rm_temps.append(temp)
            # If template is now length 0, remove it and associated event
            if len(temp[0]) == 0:
                rm_temps.append(temp)
                rm_ev.append(new_cat[i])
        for t in rm_temps:
            temp_list.remove(t)
        # Remove the corresponding events as well so catalog and distmat
        # are the same shape
        for rme in rm_ev:
            new_cat.events.remove(rme)
    print(new_cat)
    new_cat.write(out_cat, format="QUAKEML")
    print('Clustering')
    if isinstance(dist_mat, np.ndarray):
        print('Assuming the tribe provided is the same shape as dist_mat')
        # Dummy streams
        temp_list = [(Stream(), ev) for ev in catalog]
        groups = cluster_from_dist_mat(dist_mat=dist_mat,
                                       temp_list=temp_list,
                                       show=show,
                                       corr_thresh=corr_thresh,
                                       method=method)
    else:
        groups = clustering.cluster(temp_list,
                                    show=show,
                                    corr_thresh=corr_thresh,
                                    shift_len=shift_len * 2,
                                    save_corrmat=True,
                                    cores=cores)
    group_tribes = []
    group_cats = []
    if corr_params:
        for group in groups:
            group_tribes.append(
                Tribe(templates=[
                    Template(st=tmp[0],
                             name=tmp[1].resource_id.id.split('/')[-1],
                             event=tmp[1],
                             highcut=highcut,
                             lowcut=lowcut,
                             samp_rate=samp_rate,
                             filt_order=filt_order,
                             prepick=pre_pick) for tmp in group
                ]))
            group_cats.append(Catalog(events=[tmp[1] for tmp in group]))
    else:
        for group in groups:
            group_tribes.append(
                Tribe(templates=[
                    Template(st=tmp[0],
                             name=tmp[1].resource_id.id.split('/')[-1],
                             event=tmp[1].event,
                             highcut=None,
                             lowcut=None,
                             samp_rate=None,
                             filt_order=None,
                             prepick=None) for tmp in group
                ]))
            group_cats.append(Catalog(events=[tmp[1] for tmp in group]))
    return group_tribes, group_cats
Пример #16
0
    split_dates = partition(all_dates, splits)
    # Determine date range
    try:
        inst_dats = split_dates[instance]
    except IndexError:
        print('Instance no longer needed. Downsize --splits for this job')
        sys.exit()
    inst_start = min(inst_dats)
    inst_end = max(inst_dats)
    print('This instance will run from %s to %s' %
          (inst_start.strftime('%Y/%m/%d'), inst_end.strftime('%Y/%m/%d')))
else:
    inst_dats = all_dates
tribe_rd_strt = timer()
# Reading tribe
tribe = Tribe().read(
    '/projects/nesi00228/data/templates/12-15/Tribe_12-15_P_nodups.tgz')
print('Reading Tribe tarball took %s seconds' % (timer() - tribe_rd_strt))
# Extract the station info from the templates
stachans = {tr.stats.station: [] for temp in tribe for tr in temp.st}
for temp in tribe:
    for tr in temp.st:
        # Don't hard code vertical channels!!
        chan_code = 'EH' + tr.stats.channel[-1]
        if chan_code not in stachans[tr.stats.station]:
            stachans[tr.stats.station].append(chan_code)
# Specify locations of waveform files
wav_dirs = ['/projects/nesi00228/data/miniseed/']
inst_partay = Party()
for day in inst_dats:
    dto = UTCDateTime(day)
    wav_read_start = timer()
Пример #17
0
    def test_real_time_plotting(self):
        """Test the real-time plotter - must be run interactively."""

        seed_list = [
            "NZ.INZ.10.HHZ", "NZ.JCZ.10.HHZ", "NZ.FOZ.11.HHZ", "NZ.MSZ.10.HHZ",
            "NZ.PYZ.10.HHZ", "NZ.DCZ.10.HHZ", "NZ.WVZ.10.HHZ"
        ]
        client = Client("GEONET")
        inv = client.get_stations(network=seed_list[0].split(".")[0],
                                  station=seed_list[0].split(".")[1],
                                  location=seed_list[0].split(".")[2],
                                  channel=seed_list[0].split(".")[3])
        for seed_id in seed_list[1:]:
            net, sta, loc, chan = seed_id.split('.')
            inv += client.get_stations(network=net,
                                       station=sta,
                                       channel=chan,
                                       location=loc)

        now = UTCDateTime.now()
        template_cat = client.get_events(starttime=now - 3600, endtime=now)
        tribe = Tribe(templates=[
            Template(event=event, name=event.resource_id.id.split("/")[-1])
            for event in template_cat
        ])
        template_names = cycle([t.name for t in tribe])

        buffer_capacity = 1200
        rt_client = RealTimeClient(server_url="link.geonet.org.nz",
                                   buffer_capacity=buffer_capacity)
        for seed_id in seed_list:
            net, station, _, selector = seed_id.split(".")
            rt_client.select_stream(net=net,
                                    station=station,
                                    selector=selector)

        rt_client.background_run()
        while len(rt_client.buffer) < 7:
            # Wait until we have some data
            time.sleep(SLEEP_STEP)

        detections = []
        plotter = EQcorrscanPlot(rt_client=rt_client,
                                 plot_length=60,
                                 tribe=tribe,
                                 inventory=inv,
                                 update_interval=1000,
                                 detections=detections)
        plotter.background_run()

        duration = 0
        step = 5
        while duration < MAX_DURATION:
            detections.append(
                Detection(
                    template_name=next(template_names),
                    detect_time=UTCDateTime.now(),
                    no_chans=999,
                    detect_val=999,
                    threshold=999,
                    threshold_type="MAD",
                    threshold_input=999,
                    typeofdet="unreal",
                    event=Event(picks=[
                        Pick(time=UTCDateTime.now(),
                             waveform_id=WaveformStreamID(seed_string=seed_id))
                        for seed_id in seed_list
                    ])))
            time.sleep(step)
            duration += step
        rt_client.background_stop()
Пример #18
0
def cluster_map_plot(dmat_file,
                     big_tribe,
                     tribe_groups_dir,
                     raw_wav_dir,
                     savefig=None):
    """
    Wrapper on seaborn.clustermap to allow for coloring of rows/columns
    by multiplet
    :param dmat_file:
    :param big_tribe:
    :param tribe_groups_dir:
    :return:
    """
    # Make list of temp files which were actually used in the clustering
    # There were actually fewer than templates for some reason...?
    # XXX TODO May be worth using SAC directories instead?
    big_tribe.sort()
    raw_wav_files = glob('%s/*' % raw_wav_dir)
    raw_wav_files.sort()
    all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files]
    names = [t.name for t in big_tribe if t.name in all_wavs]
    wavs = [
        wav for wav in raw_wav_files
        if wav.split('/')[-1].split('.')[0] in names
    ]
    new_tribe = Tribe()
    new_tribe.templates = [temp for temp in big_tribe if temp.name in names]
    print('Processing temps')
    temp_list = [template.name for tmp, template in zip(wavs, new_tribe)]
    matrix = np.load(dmat_file)  # Take absolute value? NO
    dist_vec = squareform(matrix)
    Z = linkage(dist_vec)
    df_mat = pd.DataFrame(matrix)
    tribes = glob('{}/*.tgz'.format(tribe_groups_dir))
    grp_inds = []
    grp_nos = []
    for tribe in tribes:
        grp_nos.append(tribe.split('_')[-2])
        trb = Tribe().read(tribe)
        names = [temp.name for temp in trb]
        inds = []
        for i, nm in enumerate(temp_list):
            if nm in names:
                inds.append(i)
        grp_inds.append(tuple(inds))
    # Create a categorical palette to identify the networks
    multiplet_pal = sns.hls_palette(len(grp_inds))
    multiplet_lut = dict(zip(tuple(grp_inds), multiplet_pal))
    # Convert the palette to vectors that will be drawn on the side of the matrix
    temp_colors = {}
    temp_inds = np.arange(0, len(temp_list), 1)
    for i in temp_inds:
        for key in multiplet_lut.keys():
            if i in key:
                temp_colors[i] = multiplet_lut[key]
                break
    template_colors = pd.Series(temp_inds, index=temp_inds,
                                name='Multiplet').map(temp_colors)
    cmg = sns.clustermap(
        df_mat,
        method='single',
        cmap='vlag_r',
        vmin=0.4,
        vmax=1.4,  #row_colors=template_colors,
        col_colors=template_colors,
        row_linkage=Z,
        col_linkage=Z,
        yticklabels=False,
        xticklabels=False,
        cbar_kws={'label': '1 - CCC'},
        figsize=(12, 12))
    if not savefig:
        plt.show()
    else:
        cmg.savefig(savefig, dpi=500)
    return cmg
Пример #19
0
    for s in stat:
        for c in chans:
            st += makeTemplates(path, s, c, tempLims, freq)
    templateObj = Template(name=t.stats.starttime.isoformat()[-6:] +
                           s.lower() + c.lower(),
                           st=st,
                           lowcut=freq[0],
                           highcut=freq[1],
                           samp_rate=t.stats.sampling_rate,
                           filt_order=4,
                           prepick=0)
    templateList.append(st)
    templates.append(templateObj)
    template_names.append(t.stats.starttime.isoformat()[-6:] + s.lower() +
                          c.lower())
templates = Tribe(templates=templates)

# extract year string
tempYear = tempLims[0].year
tempYearStr = str(tempYear)

# extract month string and pad with zeros if necessary
tempMonth = tempLims[0].month
if tempMonth < 10:
    tempMonthStr = "0" + str(tempMonth)
else:
    tempMonthStr = str(tempMonth)

# extract day string and pad with zeros if necessary
tempDay = tempLims[0].day
if tempDay < 10:
Пример #20
0
    # Determine date range
    try:
        inst_dats = split_dates[instance]
    except IndexError:
        print('Instance no longer needed. Downsize --splits for this job')
        sys.exit()
    inst_start = min(inst_dats)
    inst_end = max(inst_dats)
    print('This instance will run from %s to %s'
          % (inst_start.strftime('%Y/%m/%d'),
             inst_end.strftime('%Y/%m/%d')))
else:
    inst_dats = all_dates

# Reading tribe
tribe = Tribe().read(tribe_file)

party = Party()
net_sta_loc_chans = list(set([(pk.waveform_id.network_code,
                               pk.waveform_id.station_code,
                               pk.waveform_id.location_code,
                               pk.waveform_id.channel_code)
                              for temp in tribe
                              for pk in temp.event.picks]))
for date in date_generator(inst_dats[0], inst_dats[-1]):
    dto = UTCDateTime(date)
    jday = dto.julday
    print('Running {}\nJday: {}'.format(dto, jday))
    wav_files = ['{}/{}/{}/{}/{}/{}.{}.{}.{}.{}.{:03d}.ms'.format(
        wav_dir, date.year, nslc[0], nslc[1], nslc[3], nslc[0], nslc[1],
        nslc[2], nslc[3], date.year, jday) for nslc in net_sta_loc_chans]
        pattern = os.path.join(WF_DIR_ROOT, "G*", "*..DP*%s-%s-%s_%02d*2020*" % (year, month, day, hour))
        if not glob(pattern):
            Logger.info("No data found for hour %d" % hour)
            continue
        st = read(pattern)
        st.resample(SAMPLING_RATE)
        st.detrend()
        party = tribe.detect(stream=st, threshold=threshold, threshold_type=threshold_type, trig_int=trig_int,
                             plot=False, daylong=False, ignore_bad_data=True,
                             parallel_proces=True, cores=cpu_count(), concurrency="multiprocess",
                             group_size=20, overlap="calculate")
        party.min_chans(min_chans)
        #party.decluster(trig_int=trig_int)
        party = party.filter(dates=[st[0].stats.starttime, st[0].stats.endtime], min_dets=2)
        parties += party

    # Save party
    template_list = [f.template for f in parties if f]
    
    if len(template_list) > 0:  
        party_fname = os.path.join(OUTPUT_DIR, "party_day%s.tgz" % daystr)
        Logger.info("Saving final party to: %s" % party_fname)
        parties.write(filename=party_fname)
        # Save tribe
        final_tribe = Tribe(templates=template_list)
        tribe_fname = os.path.join(OUTPUT_DIR, "tribe_day%s.tgz" % daystr)
        Logger.info("Saving final tribe to: %s" % tribe_fname)    
        final_tribe.write(filename=tribe_fname)