def heatmap_plot(dmat_file, big_tribe, raw_wav_dir, tick_int=20, title=None, show=True): mat = 1.0 - np.load(dmat_file) # More intuitive to use CCC # Make list of dates big_tribe.sort() raw_wav_files = glob('%s/*' % raw_wav_dir) raw_wav_files.sort() all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files] names = [t.name for t in big_tribe if t.name in all_wavs] new_tribe = Tribe() new_tribe.templates = [temp for temp in big_tribe if temp.name in names] times = [ template.event.origins[-1].time.strftime('%Y-%m-%d') for template in new_tribe ][::tick_int] ax = sns.heatmap(mat, vmin=-0.4, vmax=0.6, cmap='vlag', yticklabels=tick_int, xticklabels=False, cbar_kws={'label': 'CCC'}) ax.set_yticklabels(times, fontsize=6) if title: ax.set_title(title) plt.tight_layout() if show: plt.show() plt.close() return ax
def decluster_day_parties(party_dir, tribe_dir, trig_int, min_chan, metric, start, end): """ :param party_dir: Directory housing the Party files from match_filter :param trig_int: Minimum separation dist between detections in secs :param min_chan: Minimum number of channels used in detection :param metric: 'avg_cor' or 'cor_sum' :param start: Start UTCDateTime for instance :param end: End UTCDateTime for instance. :return: """ from glob import glob from obspy import UTCDateTime from eqcorrscan.core.match_filter import Party, Tribe all_parties = glob('%s/*[0-9].tgz' % party_dir) party_files = [f for f in all_parties if UTCDateTime(f.split('_')[-2]) > start - 1 and UTCDateTime(f.split('_')[-2]) < end + 1] all_files = glob('%s/*' % party_dir) party_files.sort() num = 0 print('Reading tribes to memory') tribes = [(Tribe().read(tribe_file), tribe_file.split('_')[-1].split('.')[0]) for tribe_file in glob('%s/*' % tribe_dir)] print('Starting declustering') for i, party_file in enumerate(party_files): strt = UTCDateTime() print('Processing party %s at %02d:%02d:%02d' % (party_file, strt.hour, strt.minute, strt.second)) party = Party() party.read(party_file) for tribe in tribes: outfile = '%s_min%02d_%s_%s_declust' % (party_file.split('.')[0], min_chan, metric, tribe[1]) if '%s.tgz' % outfile in all_files: print('Already wrote %s.tgz' % outfile) continue print('Working on tribe %s' % tribe[1]) num += 1 print('Original Party has length %d' % len(party)) print('Partitioning into cluster: %s' % tribe[1]) part_party = partition_party_by_tribe(party, tribe[0]) print('Enforcing minimum no_chans') part_party.min_chans(min_chan) print('Declustering') try: part_party.decluster(trig_int=trig_int, metric=metric) except IndexError as msg: print('Declustering failed with %s\n' % msg) print('Probably no detections') continue print('Writing party to %s' % outfile) part_party.write(outfile) return
def Tribe_2_Detector(tribe_dir, raw_wavs, outdir, lowcut, highcut, filt_order, samp_rate, shift_len, reject, dimension, prepick, length, multiplex=False): """ Take a directory of cluster-defined Tribes and write them to Detectors :param tribe_dir: :return: """ tribe_files = glob('%s/*.tgz' % tribe_dir) tribe_files.sort() wav_files = glob('%s/*' % raw_wavs) for tfile in tribe_files: tribe = Tribe().read(tfile) print('Working on Tribe: %s' % tfile) templates = [] for temp in tribe: try: wav = read([ wav for wav in wav_files if wav.split('/')[-1].split('.')[0] == temp.name ][0]) except IndexError: print('Event not above SNR 1.5') continue wav.traces = [ tr.trim(starttime=tr.stats.starttime + 2 - prepick, endtime=tr.stats.starttime + 2 - prepick + length) for tr in wav if tr.stats.channel[-1] == 'Z' ] templates.append(wav) # Now construct the detector detector = Detector() detector.construct(streams=templates, lowcut=lowcut, highcut=highcut, filt_order=filt_order, sampling_rate=samp_rate, multiplex=multiplex, name=tfile.split('/')[-1].split('.')[0], align=True, shift_len=shift_len, reject=reject, no_missed=False) detector.write('%s/%s_detector' % (outdir, tfile.split('/')[-1].split('.')[0])) return
def make_synthetic_tribe( n_templates: int, n_channels: int, process_length: float, template_length: float, sampling_rate: float = 100.0, ) -> Tribe: """ Generate a synthetic tribe of templates Parameters ---------- n_templates Number of templates to generate n_channels Number of channels for each template process_length Process length in seconds template_length Template length in seconds sampling_rate Sampling rate in Hz Returns ------- Tribe of synthetic templates """ nslc = [(random_string(2).upper(), random_string(4).upper(), random_string(2).upper(), random_string(3).upper()) for _ in range(n_channels)] tribe = Tribe() for i in range(n_templates): st = Stream() for j in range(n_channels): tr = Trace(data=np.random.randn( int(sampling_rate * template_length)), header=dict(network=nslc[j][0], station=nslc[j][1], location=nslc[j][2], channel=nslc[j][3], sampling_rate=sampling_rate)) st += tr name = f"synth_template_{i}" tribe.templates.append( Template(name=name, st=st, lowcut=2.0, highcut=10.0, samp_rate=sampling_rate, filt_order=4, prepick=0.1, process_length=process_length)) return tribe
def get_templates(self, **kwargs) -> Tribe: """ Get template waveforms from the database Supports passing an `concurrent.futures.Executor` using the `executor` keyword argument for parallel reading. {get_event_params} """ paths = self.bank_path + self.read_index( columns=["path", "latitude", "longitude"], **kwargs).path paths = [path.replace(self.ext, self.template_ext) for path in paths] future = self.executor.map(_lazy_template_read, paths) return Tribe([t for t in future if t is not None])
def catalog_to_templates(catalog): tribe = Tribe() for hour in range(0, 24): pattern = os.path.join(WF_DIR_ROOT, "G*", "*..DP*%s-%s-%s_%02d*2020*" % (year, month, day, hour)) if not glob(pattern): Logger.info("No data found for hour %d" % hour) continue st_hr = read(pattern) st_hr.resample(SAMPLING_RATE) st_hr.detrend() starttime = st_hr[0].stats.starttime endtime = st_hr[0].stats.endtime cat_hr = Catalog() for ev in catalog: if starttime < ev.picks[0].time < endtime: cat_hr += ev Logger.info("Processing %d detections during hour %d" % (len(cat_hr), hour)) # For each pick find the channel with highest amplitude cat_template = Catalog() for iev, ev in enumerate(cat_hr): nbad = 0 for ip, p in enumerate(ev.picks): sta = p.waveform_id.station_code cut_st = st_hr.select(station=sta).slice(starttime=p.time-0.02, endtime=p.time + 0.1) imax = np.argmax(cut_st.max()) ev.picks[ip].waveform_id.channel_code = cut_st[imax].stats.channel ratio = highf_ratio(data=cut_st[imax].data) if ratio > 0.75: nbad += 1 if nbad > 3: Logger.info("Removing event with ratio of high frequencies = %f" % ratio) elif len(ev.picks) > 4: cat_template += ev # Make a tribe for this hour if len(cat_template) > 0: tribe_hour = construct_tribe(catalog=cat_template, stream=st_hr) tribe += tribe_hour Logger.info("Adding: %s" % tribe_hour) Logger.info("Initial tribe for this whole day: %s" % tribe) return tribe
def _download_and_make_template( event: Event, client, download_data_len: float, path_structure: str, bank_path: str, template_name_structure: str, save_raw: bool, **kwargs, ) -> Template: """ Make the template using downloaded data""" Logger.debug("Making template for event {0}".format(event.resource_id)) _process_len = kwargs.pop("process_len", download_data_len) if _process_len > download_data_len: Logger.info( "Downloading {0}s of data as required by process len".format( _process_len)) download_data_len = _process_len st = _get_data_for_event(event=event, client=client, download_data_len=download_data_len, path_structure=path_structure, bank_path=bank_path, template_name_structure=template_name_structure, save_raw=save_raw) Logger.debug("Downloaded {0} traces for event {1}".format( len(st), event.resource_id)) tribe = Tribe().construct(method="from_meta_file", meta_file=Catalog([event]), st=st, process_len=download_data_len, **kwargs) try: template = tribe[0] Logger.info("Made template: {0}".format(template)) except IndexError as e: Logger.error(e) return None template.name = event.resource_id.id.split('/')[-1] # Edit comment to reflect new template_name for comment in template.event.comments: if comment.text.startswith("eqcorrscan_template_"): comment.text = "eqcorrscan_template_{0}".format(template.name) return template
if glob(pattern_hawk): ndt = 1 / 250.0 wf_len_s = int(8.0 / ndt) * ndt prepick = 4.0 detst += read(pattern_hawk, starttime=tmin - prepick, endtime=tmin - prepick + wf_len_s) detst.detrend("demean") return detst if __name__ == "__main__": daystr = sys.argv[1] print("************ %s ************" % daystr) tribe = Tribe().read(os.path.join(party_dir, "tribe_day%s.tgz" % daystr)) print(tribe) if not tribe.templates: print("no templates...") sys.exit() for template in tribe: if not template: continue fname = os.path.join(out_dir, "%s_full_template.mseed" % template.name) if os.path.exists(fname): continue num_p_picks = len( [p for p in template.event.picks if p.phase_hint == "P"]) has_magn = len(template.event.magnitudes) > 0 if len(template.st) > 5 and (num_p_picks > 0 or has_magn):
def check_tribe_quality(tribe: Tribe, seed_ids: set = None, min_stations: int = None, lowcut: float = None, highcut: float = None, filt_order: int = None, samp_rate: float = None, process_len: float = None, *args, **kwargs) -> Tribe: """ Check that templates in the tribe have channels all the same length. Parameters ---------- tribe A Tribe to check the quality of. seed_ids seed-ids of channels to be included in the templates - if None, then all channels will be included min_stations Minimum number of stations for a template to be included. lowcut Desired template processing lowcut in Hz, if None, will not check highcut Desired template processing highcut in Hz, if None, will not check filt_order Desired template filter order, if None, will not check samp_rate Desired template sampling rate in Hz, if None, will not check process_len Desired template processing length in s, if None, will not check Returns ------- A filtered tribe. """ processing_keys = dict(lowcut=lowcut, highcut=highcut, filt_order=filt_order, samp_rate=samp_rate, process_length=process_len) Logger.info("Checking processing parameters: {0}".format(processing_keys)) min_stations = min_stations or 0 _templates = [] # Perform length check for template in tribe: counted_lengths = Counter([tr.stats.npts for tr in template.st]) if len(counted_lengths) > 1: Logger.warning( "Multiple lengths found in template, using most common" " ({0})".format(counted_lengths.most_common(1)[0][0])) _template = template.copy() _template.st = Stream() for tr in template.st: if tr.stats.npts == counted_lengths.most_common(1)[0][0]: _template.st += tr _templates.append(_template) else: _templates.append(template) templates = _templates # Check processing parameters _templates = [] for template in templates: for processing_key, processing_value in processing_keys.items(): if processing_value and template.__dict__[ processing_key] != processing_value: Logger.warning("Template {0}: {1} does not match {2}".format( processing_key, template.__dict__[processing_key], processing_value)) break else: _templates.append(template) templates = _templates # Perform station check if seed_ids is None: seed_ids = {tr.id for template in tribe for tr in template.st} for template in templates: _st = Stream() for tr in template.st: if tr.id in seed_ids: _st += tr template.st = _st return Tribe([ t for t in templates if len({tr.stats.station for tr in t.st}) > min_stations ])
def make_templates( self, catalog: Catalog, stream: Stream = None, client=None, download_data_len: float = 90, save_raw: bool = True, update_index: bool = False, **kwargs, ) -> Tribe: """ Make templates from data or client based on a given catalog. Templates will be put in the database. Requires either a stream or a suitable client with a get_waveforms method. Parameters ---------- catalog Catalog of events to generate templates from. stream Optional: Stream encompassing the events in the catalog client Optional: Client with at-least a `get_waveforms` method, ideally the client should make the data for the events in catalog available. download_data_len If client is given this is the length of data to download. The raw continuous data will also be saved to disk to allow later processing if save_raw=True save_raw Whether to store raw data on disk as well - defaults to True. update_index Flag to indicate whether or not to update the event index after writing the new events. Default is False. kwargs Keyword arguments supported by EQcorrscan's `Template.construct` method. Requires at least: - lowcut - highcut - samp_rate - filt_order - prepick - length - swin Returns ------- Tribe of templates """ assert client or stream, "Needs either client or stream" if stream is not None: tribe = Tribe().construct(method="from_metafile", meta_file=catalog, stream=stream, **kwargs) else: Logger.debug("Making templates") inner_download_and_make_template = partial( _download_and_make_template, client=client, download_data_len=download_data_len, path_structure=self.path_structure, bank_path=self.bank_path, template_name_structure=self.template_name_structure, save_raw=save_raw, **kwargs) template_iterable = self.executor.map( inner_download_and_make_template, catalog) tribe = Tribe([t for t in template_iterable if t is not None]) self.put_templates(tribe, update_index=update_index) return tribe
def _download_and_make_template( event: Event, client, download_data_len: float, path_structure: str, bank_path: str, template_name_structure: str, save_raw: bool, rebuild: bool, **kwargs, ) -> Template: """ Make the template using downloaded data""" Logger.debug("Making template for event {0}".format(event.resource_id)) if not rebuild: try: path = _summarize_template( event=event, path_struct=path_structure, name_struct=template_name_structure)["path"] except ValueError as e: Logger.error(f"Could not summarize event due to {e}") return None ppath = (Path(bank_path) / path).absolute() ppath.parent.mkdir(parents=True, exist_ok=True) output_path = str(ppath) if os.path.isfile(output_path): Logger.debug("Template exists and rebuild=False, skipping") return read_template(output_path) # Sanitize event - sometime Arrivals or not linked. pick_dict = {p.resource_id.id: p for p in event.picks} for origin in event.origins: origin.arrivals = [ arr for arr in origin.arrivals if arr.pick_id in pick_dict] _process_len = kwargs.pop("process_len", download_data_len) if _process_len > download_data_len: Logger.info( "Downloading {0}s of data as required by process len".format( _process_len)) download_data_len = _process_len st = _get_data_for_event( event=event, client=client, download_data_len=download_data_len, path_structure=path_structure, bank_path=bank_path, template_name_structure=template_name_structure, save_raw=save_raw) if st is None: return None Logger.debug("Downloaded {0} traces for event {1}".format( len(st), event.resource_id)) try: tribe = Tribe().construct( method="from_meta_file", meta_file=Catalog([event]), st=st, process_len=download_data_len, **kwargs) except Exception as e: Logger.error(e) return None try: template = tribe[0] Logger.info("Made template: {0}".format(template)) except IndexError as e: Logger.error(e) return None template.name = event.resource_id.id.split('/')[-1] # Edit comment to reflect new template_name for comment in template.event.comments: if comment.text and comment.text.startswith("eqcorrscan_template_"): comment.text = "eqcorrscan_template_{0}".format(template.name) return template
def construct_tribe(catalog, stream): # parameters for templates ndt = 1 / SAMPLING_RATE prepick = int(0.05 / ndt) * ndt # this ensures all templates have same length length = int(0.5 / ndt) * ndt # this ensures all templates have same length lowcut = None highcut = 60.0 filt_order = 3 process_len = stream[0].stats.endtime - stream[0].stats.starttime min_snr = 3 num_cores = cpu_count() templates, catalog, process_lengths = custom_template_gen(method='from_meta_file', meta_file=catalog, process_len=process_len, st=stream, lowcut=lowcut, highcut=highcut, samp_rate=SAMPLING_RATE, filt_order=filt_order, length=length, prepick=prepick, swin='all', all_horiz=False, min_snr=min_snr, num_cores=num_cores, ignore_bad_data=True, plot=False, return_event=True) from eqcorrscan.core.match_filter import Template from obspy.core.event import Comment, CreationInfo template_list = [] for template, event, process_len in zip(templates, catalog, process_lengths): if len(template) < 4: Logger.warning("Less than 4 traces for this template. Skipping. Number of picks was %d" % len(event.picks)) continue t = Template() for tr in template: if not np.any(tr.data.astype(np.float16)): Logger.warning('Data are zero in float16, missing data,' ' will not use: {0}'.format(tr.id)) template.remove(tr) if len(template) == 0: Logger.error('Empty template. Skipping') continue # Check if traces have same length lengths = set([tr.stats.endtime - tr.stats.starttime for tr in template]) if len(lengths) > 1: print("Traces don't have same lengths. Fixing") wf_len = list(lengths)[0] for tr in template: tr.trim(starttime=tr.stats.starttime, endtime=tr.stats.starttime + wf_len, fill_value=0, pad=True) t.st = template t.name = template.sort(['starttime'])[0]. \ stats.starttime.strftime('%Y_%m_%dt%H_%M_%S') t.lowcut = lowcut t.highcut = highcut t.filt_order = filt_order t.samp_rate = SAMPLING_RATE t.process_length = process_len t.prepick = prepick event.comments.append(Comment( text="eqcorrscan_template_" + t.name)) t.event = event template_list.append(t) tribe = Tribe(templates=template_list) return tribe
def cluster_tribe(tribe, raw_wav_dir, lowcut, highcut, samp_rate, filt_order, pre_pick, length, shift_len, corr_thresh, cores, dist_mat=False, show=False): """ Cross correlate all templates in a tribe and return separate tribes for each cluster :param tribe: :return: .. Note: Functionality here is pilaged from align design as we don't want the multiplexed portion of that function. """ tribe.sort() raw_wav_files = glob('%s/*' % raw_wav_dir) raw_wav_files.sort() all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files] names = [t.name for t in tribe if t.name in all_wavs] wavs = [ wav for wav in raw_wav_files if wav.split('/')[-1].split('.')[0] in names ] new_tribe = Tribe() new_tribe.templates = [temp for temp in tribe if temp.name in names] print('Processing temps') temp_list = [(shortproc(read(tmp), lowcut=lowcut, highcut=highcut, samp_rate=samp_rate, filt_order=filt_order, parallel=True, num_cores=cores), template) for tmp, template in zip(wavs, new_tribe)] print('Clipping traces') for temp in temp_list: print('Clipping template %s' % temp[1].name) for tr in temp[0]: pk = [ pk for pk in temp[1].event.picks if pk.waveform_id.station_code == tr.stats.station and pk.waveform_id.channel_code == tr.stats.channel ][0] tr.trim(starttime=pk.time - shift_len - pre_pick, endtime=pk.time - pre_pick + length + shift_len) trace_lengths = [ tr.stats.endtime - tr.stats.starttime for st in temp_list for tr in st[0] ] clip_len = min(trace_lengths) - (2 * shift_len) stachans = list( set([(tr.stats.station, tr.stats.channel) for st in temp_list for tr in st[0]])) print('Aligning traces') for stachan in stachans: trace_list = [] trace_ids = [] for i, st in enumerate(temp_list): tr = st[0].select(station=stachan[0], channel=stachan[1]) if len(tr) > 0: trace_list.append(tr[0]) trace_ids.append(i) if len(tr) > 1: warnings.warn('Too many matches for %s %s' % (stachan[0], stachan[1])) shift_len_samples = int(shift_len * trace_list[0].stats.sampling_rate) shifts, cccs = stacking.align_traces(trace_list=trace_list, shift_len=shift_len_samples, positive=True) for i, shift in enumerate(shifts): st = temp_list[trace_ids[i]][0] start_t = st.select(station=stachan[0], channel=stachan[1])[0].stats.starttime start_t += shift_len start_t -= shift st.select(station=stachan[0], channel=stachan[1])[0].trim(start_t, start_t + clip_len) print('Clustering') if isinstance(dist_mat, np.ndarray): groups = cluster_from_dist_mat(dist_mat=dist_mat, temp_list=temp_list, show=show, corr_thresh=corr_thresh) else: groups = clustering.cluster(temp_list, show=show, corr_thresh=corr_thresh, allow_shift=False, save_corrmat=True, cores=cores) group_tribes = [] for group in groups: group_tribes.append( Tribe(templates=[ Template(st=tmp[0], name=tmp[1].name, event=tmp[1].event, highcut=highcut, lowcut=lowcut, samp_rate=samp_rate, filt_order=filt_order, prepick=pre_pick) for tmp in group ])) return group_tribes
# tmin = origt - 2.0 # tmax = origt + 6.0 # stations = list(set([p.waveform_id.station_code for p in event.picks])) # print(stations) # stream = get_stream_hour(tmin, tmax, stations) # stream.write(stream_fname, format="MSEED") # if event.origins[0].depth < 50: # stream.write(os.path.join("templates_less50m", "%s.mseed" % name), format="MSEED") for f in templist: if "tribe" in f: continue print(f) tribe = Tribe().read(f) template = tribe[0] event = template.event depth = event.origins[0].depth if depth < 10: origt = event.origins[0].time evlat = event.origins[0].latitude evlon = event.origins[0].longitude tmin = origt - 0.5 tmax = origt + 5.0 name = template.name fname = os.path.join("templates", "waveforms", "%s.mseed" % name) stream = read(fname) for tr in stream: station = tr.stats.station
def cluster_cat(catalog, corr_thresh, corr_params=None, raw_wav_dir=None, dist_mat=False, out_cat=None, show=False, method='average'): """ Cross correlate all templates in a tribe and return separate tribes for each cluster :param tribe: Tribe to cluster :param corr_thresh: Correlation threshold for clustering :param corr_params: Dictionary of filter parameters. Must include keys: lowcut, highcut, samp_rate, filt_order, pre_pick, length, shift_len, cores :param raw_wav_dir: Directory of waveforms to take from :param dist_mat: If there's a precomputed distance matrix, use this instead of doing all the correlations :param out_cat: Output catalog corresponding to the events :param show: Show the dendrogram? Careful as this can exceed max recursion :param wavs: Should we even bother with processing waveforms? Otherwise will just populate the tribe with an empty Stream :return: .. Note: Functionality here is pilaged from align design as we don't want the multiplexed portion of that function. """ if corr_params and raw_wav_dir: shift_len = corr_params['shift_len'] lowcut = corr_params['lowcut'] highcut = corr_params['highcut'] samp_rate = corr_params['samp_rate'] filt_order = corr_params['filt_order'] pre_pick = corr_params['pre_pick'] length = corr_params['length'] cores = corr_params['cores'] raw_wav_files = glob('%s/*' % raw_wav_dir) raw_wav_files.sort() all_wavs = [wav.split('/')[-1].split('_')[-3] for wav in raw_wav_files] print(all_wavs[0]) names = [ ev.resource_id.id.split('/')[-1] for ev in catalog if ev.resource_id.id.split('/')[-1] in all_wavs ] print(names[0]) wavs = [ wav for wav in raw_wav_files if wav.split('/')[-1].split('_')[-3] in names ] print(wavs[0]) new_cat = Catalog(events=[ ev for ev in catalog if ev.resource_id.id.split('/')[-1] in names ]) print('Processing temps') temp_list = [(shortproc(read('{}/*'.format(tmp)), lowcut=lowcut, highcut=highcut, samp_rate=samp_rate, filt_order=filt_order, parallel=True, num_cores=cores), ev.resource_id.id.split('/')[-1]) for tmp, ev in zip(wavs, new_cat)] print('Clipping traces') rm_temps = [] for i, temp in enumerate(temp_list): print('Clipping template %s' % new_cat[i].resource_id.id) rm_ts = [] # Make a list of traces with no pick to remove rm_ev = [] for tr in temp[0]: pk = [ pk for pk in new_cat[i].picks if pk.waveform_id.station_code == tr.stats.station and pk.waveform_id.channel_code == tr.stats.channel ] if len(pk) == 0: rm_ts.append(tr) else: tr.trim(starttime=pk[0].time - shift_len - pre_pick, endtime=pk[0].time - pre_pick + length + shift_len) # Remove pickless traces for rm in rm_ts: temp[0].traces.remove(rm) # If trace lengths are internally inconsistent, remove template if len(list(set([len(tr) for tr in temp[0]]))) > 1: rm_temps.append(temp) # If template is now length 0, remove it and associated event if len(temp[0]) == 0: rm_temps.append(temp) rm_ev.append(new_cat[i]) for t in rm_temps: temp_list.remove(t) # Remove the corresponding events as well so catalog and distmat # are the same shape for rme in rm_ev: new_cat.events.remove(rme) print(new_cat) new_cat.write(out_cat, format="QUAKEML") print('Clustering') if isinstance(dist_mat, np.ndarray): print('Assuming the tribe provided is the same shape as dist_mat') # Dummy streams temp_list = [(Stream(), ev) for ev in catalog] groups = cluster_from_dist_mat(dist_mat=dist_mat, temp_list=temp_list, show=show, corr_thresh=corr_thresh, method=method) else: groups = clustering.cluster(temp_list, show=show, corr_thresh=corr_thresh, shift_len=shift_len * 2, save_corrmat=True, cores=cores) group_tribes = [] group_cats = [] if corr_params: for group in groups: group_tribes.append( Tribe(templates=[ Template(st=tmp[0], name=tmp[1].resource_id.id.split('/')[-1], event=tmp[1], highcut=highcut, lowcut=lowcut, samp_rate=samp_rate, filt_order=filt_order, prepick=pre_pick) for tmp in group ])) group_cats.append(Catalog(events=[tmp[1] for tmp in group])) else: for group in groups: group_tribes.append( Tribe(templates=[ Template(st=tmp[0], name=tmp[1].resource_id.id.split('/')[-1], event=tmp[1].event, highcut=None, lowcut=None, samp_rate=None, filt_order=None, prepick=None) for tmp in group ])) group_cats.append(Catalog(events=[tmp[1] for tmp in group])) return group_tribes, group_cats
split_dates = partition(all_dates, splits) # Determine date range try: inst_dats = split_dates[instance] except IndexError: print('Instance no longer needed. Downsize --splits for this job') sys.exit() inst_start = min(inst_dats) inst_end = max(inst_dats) print('This instance will run from %s to %s' % (inst_start.strftime('%Y/%m/%d'), inst_end.strftime('%Y/%m/%d'))) else: inst_dats = all_dates tribe_rd_strt = timer() # Reading tribe tribe = Tribe().read( '/projects/nesi00228/data/templates/12-15/Tribe_12-15_P_nodups.tgz') print('Reading Tribe tarball took %s seconds' % (timer() - tribe_rd_strt)) # Extract the station info from the templates stachans = {tr.stats.station: [] for temp in tribe for tr in temp.st} for temp in tribe: for tr in temp.st: # Don't hard code vertical channels!! chan_code = 'EH' + tr.stats.channel[-1] if chan_code not in stachans[tr.stats.station]: stachans[tr.stats.station].append(chan_code) # Specify locations of waveform files wav_dirs = ['/projects/nesi00228/data/miniseed/'] inst_partay = Party() for day in inst_dats: dto = UTCDateTime(day) wav_read_start = timer()
def test_real_time_plotting(self): """Test the real-time plotter - must be run interactively.""" seed_list = [ "NZ.INZ.10.HHZ", "NZ.JCZ.10.HHZ", "NZ.FOZ.11.HHZ", "NZ.MSZ.10.HHZ", "NZ.PYZ.10.HHZ", "NZ.DCZ.10.HHZ", "NZ.WVZ.10.HHZ" ] client = Client("GEONET") inv = client.get_stations(network=seed_list[0].split(".")[0], station=seed_list[0].split(".")[1], location=seed_list[0].split(".")[2], channel=seed_list[0].split(".")[3]) for seed_id in seed_list[1:]: net, sta, loc, chan = seed_id.split('.') inv += client.get_stations(network=net, station=sta, channel=chan, location=loc) now = UTCDateTime.now() template_cat = client.get_events(starttime=now - 3600, endtime=now) tribe = Tribe(templates=[ Template(event=event, name=event.resource_id.id.split("/")[-1]) for event in template_cat ]) template_names = cycle([t.name for t in tribe]) buffer_capacity = 1200 rt_client = RealTimeClient(server_url="link.geonet.org.nz", buffer_capacity=buffer_capacity) for seed_id in seed_list: net, station, _, selector = seed_id.split(".") rt_client.select_stream(net=net, station=station, selector=selector) rt_client.background_run() while len(rt_client.buffer) < 7: # Wait until we have some data time.sleep(SLEEP_STEP) detections = [] plotter = EQcorrscanPlot(rt_client=rt_client, plot_length=60, tribe=tribe, inventory=inv, update_interval=1000, detections=detections) plotter.background_run() duration = 0 step = 5 while duration < MAX_DURATION: detections.append( Detection( template_name=next(template_names), detect_time=UTCDateTime.now(), no_chans=999, detect_val=999, threshold=999, threshold_type="MAD", threshold_input=999, typeofdet="unreal", event=Event(picks=[ Pick(time=UTCDateTime.now(), waveform_id=WaveformStreamID(seed_string=seed_id)) for seed_id in seed_list ]))) time.sleep(step) duration += step rt_client.background_stop()
def cluster_map_plot(dmat_file, big_tribe, tribe_groups_dir, raw_wav_dir, savefig=None): """ Wrapper on seaborn.clustermap to allow for coloring of rows/columns by multiplet :param dmat_file: :param big_tribe: :param tribe_groups_dir: :return: """ # Make list of temp files which were actually used in the clustering # There were actually fewer than templates for some reason...? # XXX TODO May be worth using SAC directories instead? big_tribe.sort() raw_wav_files = glob('%s/*' % raw_wav_dir) raw_wav_files.sort() all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files] names = [t.name for t in big_tribe if t.name in all_wavs] wavs = [ wav for wav in raw_wav_files if wav.split('/')[-1].split('.')[0] in names ] new_tribe = Tribe() new_tribe.templates = [temp for temp in big_tribe if temp.name in names] print('Processing temps') temp_list = [template.name for tmp, template in zip(wavs, new_tribe)] matrix = np.load(dmat_file) # Take absolute value? NO dist_vec = squareform(matrix) Z = linkage(dist_vec) df_mat = pd.DataFrame(matrix) tribes = glob('{}/*.tgz'.format(tribe_groups_dir)) grp_inds = [] grp_nos = [] for tribe in tribes: grp_nos.append(tribe.split('_')[-2]) trb = Tribe().read(tribe) names = [temp.name for temp in trb] inds = [] for i, nm in enumerate(temp_list): if nm in names: inds.append(i) grp_inds.append(tuple(inds)) # Create a categorical palette to identify the networks multiplet_pal = sns.hls_palette(len(grp_inds)) multiplet_lut = dict(zip(tuple(grp_inds), multiplet_pal)) # Convert the palette to vectors that will be drawn on the side of the matrix temp_colors = {} temp_inds = np.arange(0, len(temp_list), 1) for i in temp_inds: for key in multiplet_lut.keys(): if i in key: temp_colors[i] = multiplet_lut[key] break template_colors = pd.Series(temp_inds, index=temp_inds, name='Multiplet').map(temp_colors) cmg = sns.clustermap( df_mat, method='single', cmap='vlag_r', vmin=0.4, vmax=1.4, #row_colors=template_colors, col_colors=template_colors, row_linkage=Z, col_linkage=Z, yticklabels=False, xticklabels=False, cbar_kws={'label': '1 - CCC'}, figsize=(12, 12)) if not savefig: plt.show() else: cmg.savefig(savefig, dpi=500) return cmg
for s in stat: for c in chans: st += makeTemplates(path, s, c, tempLims, freq) templateObj = Template(name=t.stats.starttime.isoformat()[-6:] + s.lower() + c.lower(), st=st, lowcut=freq[0], highcut=freq[1], samp_rate=t.stats.sampling_rate, filt_order=4, prepick=0) templateList.append(st) templates.append(templateObj) template_names.append(t.stats.starttime.isoformat()[-6:] + s.lower() + c.lower()) templates = Tribe(templates=templates) # extract year string tempYear = tempLims[0].year tempYearStr = str(tempYear) # extract month string and pad with zeros if necessary tempMonth = tempLims[0].month if tempMonth < 10: tempMonthStr = "0" + str(tempMonth) else: tempMonthStr = str(tempMonth) # extract day string and pad with zeros if necessary tempDay = tempLims[0].day if tempDay < 10:
# Determine date range try: inst_dats = split_dates[instance] except IndexError: print('Instance no longer needed. Downsize --splits for this job') sys.exit() inst_start = min(inst_dats) inst_end = max(inst_dats) print('This instance will run from %s to %s' % (inst_start.strftime('%Y/%m/%d'), inst_end.strftime('%Y/%m/%d'))) else: inst_dats = all_dates # Reading tribe tribe = Tribe().read(tribe_file) party = Party() net_sta_loc_chans = list(set([(pk.waveform_id.network_code, pk.waveform_id.station_code, pk.waveform_id.location_code, pk.waveform_id.channel_code) for temp in tribe for pk in temp.event.picks])) for date in date_generator(inst_dats[0], inst_dats[-1]): dto = UTCDateTime(date) jday = dto.julday print('Running {}\nJday: {}'.format(dto, jday)) wav_files = ['{}/{}/{}/{}/{}/{}.{}.{}.{}.{}.{:03d}.ms'.format( wav_dir, date.year, nslc[0], nslc[1], nslc[3], nslc[0], nslc[1], nslc[2], nslc[3], date.year, jday) for nslc in net_sta_loc_chans]
pattern = os.path.join(WF_DIR_ROOT, "G*", "*..DP*%s-%s-%s_%02d*2020*" % (year, month, day, hour)) if not glob(pattern): Logger.info("No data found for hour %d" % hour) continue st = read(pattern) st.resample(SAMPLING_RATE) st.detrend() party = tribe.detect(stream=st, threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plot=False, daylong=False, ignore_bad_data=True, parallel_proces=True, cores=cpu_count(), concurrency="multiprocess", group_size=20, overlap="calculate") party.min_chans(min_chans) #party.decluster(trig_int=trig_int) party = party.filter(dates=[st[0].stats.starttime, st[0].stats.endtime], min_dets=2) parties += party # Save party template_list = [f.template for f in parties if f] if len(template_list) > 0: party_fname = os.path.join(OUTPUT_DIR, "party_day%s.tgz" % daystr) Logger.info("Saving final party to: %s" % party_fname) parties.write(filename=party_fname) # Save tribe final_tribe = Tribe(templates=template_list) tribe_fname = os.path.join(OUTPUT_DIR, "tribe_day%s.tgz" % daystr) Logger.info("Saving final tribe to: %s" % tribe_fname) final_tribe.write(filename=tribe_fname)