def process(self, window, *args, **kwargs): station = window['station'] starttime = window['starttime'] endtime = window['endtime'] client = Client(sds_root=self.sds_root) stream = client.get_waveforms(network="*", station=station, location="*", channel="*", starttime=starttime, endtime=endtime) stream.sort(keys=['channel'], reverse=True) seismometer_list = {} for trace in stream: current_type = trace.stats.channel[0:2] if not seismometer_list.get(current_type): seismometer_list[current_type] = Stream(trace) else: seismometer_list[current_type].append(trace) for key, value in seismometer_list.items(): yield value
def read_sds(pick, sds_root, phase="P", component="Z", trace_length=30, sample_rate=100): client = Client(sds_root=sds_root) if not pick.phase_hint == phase: return if not pick.waveform_id.channel_code[-1] == component: return t = pick.time t = t - trace_length + np.random.random_sample() * trace_length net = "*" sta = pick.waveform_id.station_code loc = "*" chan = "??" + component st = client.get_waveforms(net, sta, loc, chan, t, t + trace_length + 1) if st.traces: trace = st.traces[0] trace = signal_preprocessing(trace) points = trace_length * sample_rate + 1 trim_trace(trace, points) return trace
def write_station_dataset(dataset_output_dir, sds_root, nslc, start_time, end_time, trace_length=30, sample_rate=100, remove_dir=False): if remove_dir: shutil.rmtree(dataset_output_dir, ignore_errors=True) os.makedirs(dataset_output_dir, exist_ok=True) client = Client(sds_root=sds_root) net, sta, loc, chan = nslc t = start_time counter = 0 while t < end_time: stream = client.get_waveforms(net, sta, loc, chan, t, t + trace_length + 1) stream = signal_preprocessing(stream) points = trace_length * sample_rate + 1 for trace in stream: try: trim_trace(trace, points) except IndexError as err: print(err) stream.remove(trace) continue finally: trace.picks = [] time_stamp = trace.stats.starttime.isoformat() trace.write(dataset_output_dir + '/' + time_stamp + trace.get_id() + ".pkl", format="PICKLE") counter += 1 t += trace_length
def read_sds(window): config = get_config() station = window['station'] starttime = window['starttime'] endtime = window['endtime'] + 0.1 client = Client(sds_root=config['SDS_ROOT']) stream = client.get_waveforms(network="*", station=station, location="*", channel="*", starttime=starttime, endtime=endtime) stream.sort(keys=['channel'], reverse=True) stream_list = {} for trace in stream: geophone_type = trace.stats.channel[0:2] if not stream_list.get(geophone_type): stream_list[geophone_type] = Stream(trace) else: stream_list[geophone_type].append(trace) return stream_list
def test_get_waveforms_bulk(self): """ Test get_waveforms_bulk method. """ year = 2015 doy = 247 t = UTCDateTime("%d-%03dT00:00:00" % (year, doy)) with TemporarySDSDirectory(year=year, doy=doy) as temp_sds: chunks = [["AB", "XYZ", "", "HHZ", t, t + 20], ["AB", "XYZ", "", "HHN", t + 20, t + 40], ["AB", "XYZ", "", "HHE", t + 40, t + 60], ["CD", "ZZZ3", "00", "BHZ", t + 60, t + 80], ["CD", "ZZZ3", "00", "BHN", t + 80, t + 100], ["CD", "ZZZ3", "00", "BHE", t + 120, t + 140]] client = Client(temp_sds.tempdir) st = client.get_waveforms_bulk(chunks) for _i in range(6): if _i <= 2: self.assertEqual(st[_i].stats.network, "AB") self.assertEqual(st[_i].stats.station, "XYZ") self.assertEqual(st[_i].stats.location, "") elif _i >= 2: self.assertEqual(st[_i].stats.network, "CD") self.assertEqual(st[_i].stats.station, "ZZZ3") self.assertEqual(st[_i].stats.location, "00") self.assertEqual(st[0].stats.channel, "HHZ") self.assertEqual(st[1].stats.channel, "HHN") self.assertEqual(st[2].stats.channel, "HHE") self.assertEqual(st[3].stats.channel, "BHZ") self.assertEqual(st[4].stats.channel, "BHN") self.assertEqual(st[5].stats.channel, "BHE")
def read_sds(event, sds_root, phase="P", component="Z", trace_length=30, sample_rate=100, random_time=0): stream = Stream() client = Client(sds_root=sds_root) for pick in event.picks: if not pick.phase_hint == phase: print("Skip " + pick.phase_hint + " phase pick") continue if not pick.waveform_id.channel_code[-1] == component: print(pick.waveform_id.channel_code) continue t = event.origins[0].time if pick.time > t + trace_length: t = pick.time - trace_length + 5 print("origin: " + t.isoformat() + " pick: " + pick.time.isoformat()) if random_time: t = t - random_time + np.random.random_sample() * random_time * 2 net = "*" sta = pick.waveform_id.station_code loc = "*" chan = "??" + component st = client.get_waveforms(net, sta, loc, chan, t, t + trace_length + 1) if st.traces: trace = st.traces[0] trace = signal_preprocessing(trace) points = trace_length * sample_rate + 1 try: trim_trace(trace, points) except Exception as err: print(err) continue stream += st.traces[0] else: print("No trace in ", t.isoformat(), net, sta, loc, chan) return stream
def test_get_all_stations_and_nslc(self): """ Test `get_all_stations` and `get_all_nslc` methods """ # generate dummy SDS t = UTCDateTime() with TemporarySDSDirectory(year=None, doy=None, time=t) as temp_sds: client = Client(temp_sds.tempdir) expected_netsta = sorted([ (net, sta) for net in temp_sds.networks for sta in temp_sds.stations]) got_netsta = client.get_all_stations() self.assertEqual(expected_netsta, got_netsta) expected_nslc = sorted([ (net, sta, loc, cha) for net in temp_sds.networks for sta in temp_sds.stations for loc in temp_sds.locations for cha in temp_sds.channels]) got_nslc = client.get_all_nslc() self.assertEqual(expected_nslc, got_nslc) got_nslc = client.get_all_nslc(datetime=t) self.assertEqual(expected_nslc, got_nslc) # other dates that have no data should return empty list got_nslc = client.get_all_nslc(datetime=t + 2 * 24 * 3600) self.assertEqual([], got_nslc) got_nslc = client.get_all_nslc(datetime=t - 2 * 24 * 3600) self.assertEqual([], got_nslc)
def test_read_from_SDS_with_wildcarded_seed_ids(self): """ Test reading data with wildcarded SEED IDs. """ year, doy = 2015, 1 t = UTCDateTime("%d-%03dT00:00:00" % (year, doy)) with TemporarySDSDirectory(year=year, doy=doy) as temp_sds: # test different wildcard combinations in SEED ID client = Client(temp_sds.tempdir) for wildcarded_seed_id, num_matching_ids in zip( ("AB.ZZZ3..HH?", "AB.ZZZ3..HH*", "*.*..HHZ", "*.*.*.HHZ", "*.*.*.*"), (3, 3, 4, 8, 48)): net, sta, loc, cha = wildcarded_seed_id.split(".") st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), num_matching_ids) # test with SDS type wildcards for type_wildcard in ("*", "?"): net, sta, loc, cha = wildcarded_seed_id.split(".") st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), num_matching_ids)
def test_read_from_sds_with_wildcarded_seed_ids(self): """ Test reading data with wildcarded SEED IDs. """ year, doy = 2015, 1 t = UTCDateTime("%d-%03dT00:00:00" % (year, doy)) with TemporarySDSDirectory(year=year, doy=doy) as temp_sds: # test different wildcard combinations in SEED ID client = Client(temp_sds.tempdir) for wildcarded_seed_id, num_matching_ids in zip( ("AB.ZZZ3..HH?", "AB.ZZZ3..HH*", "*.*..HHZ", "*.*.*.HHZ", "*.*.*.*"), (3, 3, 4, 8, 48)): net, sta, loc, cha = wildcarded_seed_id.split(".") st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), num_matching_ids) # test with SDS type wildcards for type_wildcard in ("*", "?"): net, sta, loc, cha = wildcarded_seed_id.split(".") st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), num_matching_ids)
def main(argv=None): MatplotlibBackend.switch_backend("AGG", sloppy=False) parser = ArgumentParser( prog='obspy-sds-report', description=__doc__, formatter_class=RawDescriptionHelpFormatter) parser.add_argument( '-r', '--sds-root', dest='sds_root', required=True, help='Root folder of SDS archive.') parser.add_argument( '-o', '--output', dest='output', required=True, help='Full path (absolute or relative) of output files, without ' 'suffix (e.g. ``/tmp/sds_report``).') parser.add_argument( '-u', '--update', dest='update', default=False, action="store_true", help='Only update latency information, reuse previously computed list ' 'of streams to check and data percentage and gap count. Many ' 'other options e.g. regarding stream selection (``--id``, ' ' ``--location``, ..) and time span of data quality checks ' '(``--check-quality-days``) will be without effect if this ' 'option is specified. Only updating latency is significantly ' 'faster than a full analysis run, a normal use case is to do a ' 'full run once or twice per day and update latency every 5 or ' 'ten minutes. An exception is raised if an update is specified ' 'but the necessary file is not yet present.') parser.add_argument( '-l', '--location', dest='locations', action="append", help='Location codes to look for (e.g. ``""`` for empty location code ' 'or ``"00"``). This option can be provided multiple times and ' 'must be specified at least once for a full run (i.e. without ' '``--update`` option). While network/station combinations are ' 'automatically discovered, only streams whose location codes are ' 'provided here will be discovered and taken into account and ' 'ultimately displayed.') parser.add_argument( '-c', '--channel', dest='channels', action="append", help='Channel codes to look for (e.g. specified three times with ' '``HHZ``, ``EHZ`` and ``ELZ`` to cover all stations that serve ' 'a broad-band, short-period or low gain vertical channel). ' 'This option can be provided multiple times and must be ' 'specified at least once for a full run (i.e. without ' '``--update`` option). Only one stream per ' 'network/station/location combination will be displayed, ' 'selected by the lowest latency.') parser.add_argument( '-i', '--id', dest='ids', action="append", default=[], help='SEED IDs of streams that should be included in addition to the ' 'autodiscovery of streams controlled by ``--location`` and ' '``--channel`` options (e.g. ``IU.ANMO..LHZ``). ' 'This option can be provided multiple times.') parser.add_argument( '--skip', dest='skip', action="append", default=[], help='Networks or stations that should be skipped (e.g. ``IU`` or ' '``IU.ANMO``). This option can be provided multiple times.') parser.add_argument( '-f', '--format', default="MSEED", choices=ENTRY_POINTS['waveform'], help='Waveform format of SDS archive. Should be "MSEED" in most ' 'cases. Use ``None`` or empty string for format autodection ' '(slower and should not be necessary in most all cases). ' 'Warning: formats that do not support ``headonly`` ' 'option in ``read()`` operation will be significantly slower).') parser.add_argument( '--check-backwards-days', dest='check_back_days', default=30, type=int, help='Check for latency backwards for this many days.') parser.add_argument( '--check-quality-days', dest='check_quality_days', default=7, type=int, help='Calculate and plot data availability and number of ' 'gaps for a period of this many days.') parser.add_argument( '--latency-warn', dest='latency_warn', default=3600, type=float, help='Latency warning threshold in seconds.') parser.add_argument( '--latency-warn-color', dest='latency_warn_color', default="#FFFF33", help='Latency warning threshold color (valid HTML color string).') parser.add_argument( '--latency-error', dest='latency_error', default=24*3600, type=float, help='Latency error threshold in seconds.') parser.add_argument( '--latency-error-color', dest='latency_error_color', default="#E41A1C", help='Latency error threshold color (valid HTML color string).') parser.add_argument( '--percentage-warn', dest='percentage_warn', default=99.5, type=float, help='Data availability percentage warning threshold (``0`` to ' '``100``).') parser.add_argument( '--gaps-warn', dest='gaps_warn', default=20, type=int, help='Gap/overlap number warning threshold.') parser.add_argument( '--data-quality-warn-color', dest='data_quality_warn_color', default="#377EB8", help='Data quality (percentage/gap count) warning color ' '(valid HTML color string).') parser.add_argument( '--outdated-color', dest='outdated_color', default="#808080", help='Color for streams that have no data in check range ' '(valid HTML color string).') parser.add_argument( '--ok-color', dest='ok_color', default="#4DAF4A", help='Color for streams that pass all checks (valid HTML color ' 'string).') parser.add_argument( '--background-color', dest='background_color', default="#999999", help='Color for background of page (valid HTML color string).') parser.add_argument( '-V', '--version', action='version', version='%(prog)s ' + __version__) args = parser.parse_args(argv) now = UTCDateTime() stop_time = now - args.check_back_days * 24 * 3600 client = Client(args.sds_root) dtype_streamfile = np.dtype("U10, U30, U10, U10, f8, f8, i8") availability_check_endtime = now - 3600 availability_check_starttime = ( availability_check_endtime - (args.check_quality_days * 24 * 3600)) streams_file = args.output + ".txt" html_file = args.output + ".html" scan_file = args.output + ".png" if args.format.upper() == "NONE" or args.format == "": args.format = None # check whether to set up list of streams to check or use existing list # update list of streams once per day at nighttime if args.update: if not os.path.isfile(streams_file): msg = ("Update flag specified, but no output of previous full run " "was present in the expected location (as determined by " "``--output`` flag: {})").format(streams_file) raise IOError(msg) # use existing list of streams and availability information, just # update latency nslc = np.loadtxt(streams_file, delimiter=",", dtype=dtype_streamfile) else: if not args.locations or not args.channels: msg = ("At least one location code ``--location`` and at least " "one channel code ``--channel`` must be specified.") raise ObsPyException(msg) nsl = set() # get all network/station combinations in SDS archive for net, sta in client.get_all_stations(): if net in args.skip or ".".join((net, sta)) in args.skip: continue # for all combinations of user specified location and channel codes # check if data is in SDS archive for loc in args.locations: for cha in args.channels: if client.has_data(net, sta, loc, cha): # for now omit channel information, we only include the # channel with lowest latency later on nsl.add((net, sta, loc)) break nsl = sorted(nsl) nslc = [] # determine which channel to check for each network/station/location # combination for net, sta, loc in nsl: latency = [] # check latency of all channels that should be checked for cha in args.channels: latency_ = client.get_latency(net, sta, loc, cha, stop_time=stop_time) latency.append(latency_ or np.inf) # only include the channel with lowest latency in our stream list cha = args.channels[np.argmin(latency)] latency = np.min(latency) nslc.append((net, sta, loc, cha, latency)) for id in args.ids: net, sta, loc, cha = id.split(".") latency = client.get_latency(net, sta, loc, cha, stop_time=stop_time) latency = latency or np.inf nslc.append((net, sta, loc, cha, latency)) nslc_ = [] # request and assemble availability information. # this takes pretty long (on network/slow file systems), # so we only do it during a full run here, not during update for net, sta, loc, cha, latency in nslc: percentage, gap_count = client.get_availability_percentage( net, sta, loc, cha, availability_check_starttime, availability_check_endtime) nslc_.append((net, sta, loc, cha, latency, percentage, gap_count)) nslc = nslc_ # write stream list and availability information to file nslc = np.array(sorted(nslc), dtype=dtype_streamfile) np.savetxt(streams_file, nslc, delimiter=",", fmt=["%s", "%s", "%s", "%s", "%f", "%f", "%d"]) # generate obspy-scan image files = [] seed_ids = set() for nslc_ in nslc: net, sta, loc, cha, latency, _, _ = nslc_ if np.isinf(latency) or latency > args.check_back_days * 24 * 3600: continue seed_ids.add(".".join((net, sta, loc, cha))) files += client._get_filenames( net, sta, loc, cha, availability_check_starttime, availability_check_endtime) scan(files, format=args.format, starttime=availability_check_starttime, endtime=availability_check_endtime, plot=scan_file, verbose=False, recursive=True, ignore_links=False, seed_ids=seed_ids, print_gaps=False) # request and assemble current latency information data = [] for net, sta, loc, cha, latency, percentage, gap_count in nslc: if args.update: latency = client.get_latency(net, sta, loc, cha, stop_time=stop_time) latency = latency or np.inf data.append((net, sta, loc, cha, latency, percentage, gap_count)) # separate out the long dead streams data_normal = [] data_outdated = [] for data_ in data: latency = data_[4] if np.isinf(latency) or latency > args.check_back_days * 24 * 3600: data_outdated.append(data_) else: data_normal.append(data_) # write html output to file html = _format_html(args, data_normal, data_outdated) with open(html_file, "wt") as fh: fh.write(html)
parameters.append(parameter) with concurrent.futures.ProcessPoolExecutor() as executor: streams = executor.map(self._get_waveform, parameters) return list(streams) def _get_ppsdS(self, streams_info): ##Devuelve todos los psds dado una lista de strams with concurrent.futures.ProcessPoolExecutor() as executor: ppsdS = executor.map(self._get_ppsd, streams_info) # ~ return list(ppsdS) if __name__ == '__main__': client_RSSB = Client("\\\\168.176.35.177\\archive") parser_RSSB = Parser(os.path.join(os.getcwd(), 'BT_UNALv4.dataless')) SNA = Seismic_Noise_Analysis(client_RSSB, parser_RSSB, "20170101", "20200508") # ~ print(SNA.info_by_channel) DIR = os.getcwd() SNA.execute(DIR, "RSSBv1") # ~ SNA.execute() # ~ print(SNA.info) # ~ SNA.get_streams(network="BT",station="BUPI",location="00",channel="BHN") # ~ SNA.get_streams(UTCDateTime("20171224"), UTCDateTime("20171225"),["BT.BUPI.00.BH*","BT.ZIPA.*"]) # ~ streams=SNA.get_streams(["BT.BUPI.00.*"]) # ~ print(SNA.get_ppsdS(streams))
def main(): # Arguments argu_parser = argparse.ArgumentParser( description="Run the main code to compute quality sheets for a set of STATIONS") argu_parser.add_argument("-s", "--stations", nargs='*', required=True, help='list of stations name Separate with spaces that must be in the station_dictionnary file ex: MEUD00 OBP10') argu_parser.add_argument("-b", "--starttime", default=UTCDateTime(2000, 1, 1), type=UTCDateTime, help="Start time for processing. Various format accepted. Example : 2012,2,1 / 2012-02-01 / 2012,032 / 2012032 / etc ... See UTCDateTime for a complete list. Default is 2010-1-1") argu_parser.add_argument("-e", "--endtime", default=UTCDateTime(2055, 9, 16), type=UTCDateTime, help="End time for processing. Various format accepted. Example : 2012,2,1 / 2012-02-01 / 2012,032 / 2012032 / etc ... See UTCDateTime for a complete list. Default is 2015-1-1") argu_parser.add_argument("-c", "--channels", nargs='+', help="Process only CHANNELS. Do not use this option if you want to process all available channels indicated in station_dictionnary. Separate CHANNELS with spaces. No wildcard. Default is all channels") argu_parser.add_argument("-pkl", "--path_pkl", default='./PKL', help="output directory for pkl files. Default is ./PKL ") argu_parser.add_argument("-plt", "--path_plt", default='./PLT', help="output directory for plt files. Default is ./PLT ") argu_parser.add_argument("-force_paz", default=False, action='store_true', help="Use this option if you want don't want to use the dataless file specified in station_dictionnary. Only PAZ response computed from sensor and digitizer will be used. Use for debug only. Dataless recommended") argu_parser.add_argument("--color_map", default='pqlx', help="Color map for PPSD. Default is pqlx") args = argu_parser.parse_args() # List of stations STA = args.stations chan_proc = args.channels # Time span start = args.starttime stop = args.endtime # Color Map if args.color_map == 'pqlx': cmap = pqlx elif args.color_map == 'viridis_white': from obspy.imaging.cm import viridis_white cmap = viridis_white elif args.color_map == 'viridis_white_r': from obspy.imaging.cm import viridis_white_r cmap = viridis_white_r else: cmap = get_cmap(args.color_map) # Output Paths PATH_PKL = os.path.abspath(args.path_pkl) PATH_PLT = os.path.abspath(args.path_plt) # Create PKL and PLT directory if they don't exist if not os.path.exists(PATH_PKL): os.makedirs(PATH_PKL) if not os.path.exists(PATH_PLT): os.makedirs(PATH_PLT) # ---------- # Check if all stations are in station_dictionnary for dict_station_name in STA: try: dict_station_name except: print dict_station_name + " is not in station_dictionnary.py" exit() try: eval(dict_station_name)['network'] except: print dict_station_name + " does not have a network in station_dictionnary.py" exit() try: eval(dict_station_name)['station'] except: print dict_station_name + " does not have a station in station_dictionnary.py" exit() try: eval(dict_station_name)['locid'] except: print dict_station_name + " does not have a locid in station_dictionnary.py" exit() try: eval(dict_station_name)['dataless_file'] # if there is a dataless_file, check if the file exists if not os.path.isfile(eval(dict_station_name)['dataless_file']): print dict_station_name + " does not have a valid dataless_file in station_dictionnary.py :" print eval(dict_station_name)['dataless_file'] + ' does not exist' exit() except: pass try: eval(dict_station_name)['path_data'] except: print dict_station_name + " does not have a path_data in station_dictionnary.py" exit() if not os.path.exists(eval(dict_station_name)['path_data']): print dict_station_name + " does not have a valid path_data in station_dictionnary.py :" print eval(dict_station_name)['path_data'] + ' does not exist' exit() # Loop over stations for dict_station_name in STA: net = eval(dict_station_name)['network'] sta = eval(dict_station_name)['station'] locid = eval(dict_station_name)['locid'] sds_path = eval(dict_station_name)['path_data'] try: title_comment = eval(dict_station_name)['title_comment'] except: title_comment = False pass # use only channels in station_dictionnay if isinstance(chan_proc, list): chan_proc = np.intersect1d( chan_proc, eval(dict_station_name)['channels']) else: chan_proc = eval(dict_station_name)['channels'] # Look for a DATALESS dataless = None if not args.force_paz: try: dataless = glob.glob(eval(dict_station_name)['dataless_file'])[0] except: print "No valid dataless found for " + net + "." + sta + "." + locid else: print "Using dataless file : " + dataless paz = None if dataless is None: # Look for a PAZ try: sismo = eval((eval(dict_station_name)['sensor'].lower())) acq = eval((eval(dict_station_name)['digitizer'].lower())) except: print "No PAZ found for " + net + "." + sta + "." + locid else: paz = {'gain': sismo['gain'], 'poles': sismo['poles'], 'zeros': sismo['zeros'], 'sensitivity': sismo['sensitivity'] / acq['lsb']} print "PAZ from instruments.py: " print paz # exit if no dataless nor paz if dataless is None and paz is None: print "you must provide a dataless file or a sensor and a digitizer from instruments.py" exit() # exit if dataless AND paz are provided if dataless is not None and paz is not None: print "you must provide a dataless file or a sensor and a digitizer from instruments.py but not both !" exit() print "SDS archive is " + str(sds_path) # Loop over channels for chan in chan_proc: # Try to load a pickle file for this sta/net/chan filename_pkl = PATH_PKL + '/' + net + "." + \ sta + "." + locid + "." + chan + ".pkl" is_pickle = False try: pkl_file = open(filename_pkl, 'r') quality_check = cPickle.load(pkl_file) except: print "No pickle file found for " + net + "." + sta + "." + locid + "." + chan + " (looked for " + filename_pkl + ")" else: print "Use the pickle file : " + filename_pkl is_pickle = True sds_client = Client(sds_path) print "Reading %s.%s.%s.%s in SDS archive from %s to %s" % (net, sta, locid, chan, start, stop) all_streams = sds_client.get_waveforms( net, sta, locid, chan, start, stop) if len(all_streams.traces) == 0: print "No data found for %s.%s.%s.%s in SDS archive from %s to %s" % (net, sta, locid, chan, start, stop) exit() print "Processing data" # Initiate the QC if is_pickle is False: quality_check = QC(all_streams[0].stats, dataless=dataless, paz=paz, skip_on_gaps=True) is_pickle = True # Remove the minutes before the next hour (useful when # computing statistics per hour) # mst = min start time mst = min([temp.stats.starttime for temp in all_streams]) mst = UTCDateTime(mst.year, mst.month, mst.day, mst.hour, 0, 0) + 3600. all_streams.trim(starttime=mst, nearest_sample=False) # Trim to stop when asked all_streams.trim(endtime=stop) # Remove the minutes after the last hour (useful when # computing statistics per hour) # met = max end time met = max([temp.stats.endtime for temp in all_streams]) met = UTCDateTime(met.year, met.month, met.day, met.hour, 0, 0) all_streams.trim(endtime=met, nearest_sample=False) # Add all streams to QC quality_check.add(all_streams) # save # This can be 2 levels below to save a little bit of time # (save and loading) if is_pickle: print filename_pkl quality_check.save(filename_pkl) print filename_pkl + " updated" else: print "!!!!! Nothing saved/created for " + net + "." + sta + "." + locid # Plot if is_pickle and len(quality_check.times_used) > 0: filename_plt = PATH_PLT + '/' + net + "." + \ sta + "." + locid + "." + chan + ".png" quality_check.plot(cmap=cmap, filename=filename_plt, show_percentiles=True, starttime=start, endtime=stop, title_comment=title_comment) print filename_plt + " updated"
import matplotlib import matplotlib.pyplot as plt from obspy.clients.filesystem.sds import Client from obspy.core import * from obspy.signal.trigger import * from mpl_toolkits import mplot3d import pandas as pd from sklearn import cluster sdsRoot = "/mnt/DATA/DATA" client = Client(sds_root=sdsRoot) client.nslc = client.get_all_nslc(sds_type="D") t = UTCDateTime("201602060356") stream = Stream() counter = 0 for net, sta, loc, chan in client.nslc: counter += 1 st = client.get_waveforms(net, sta, loc, chan, t, t + 60) try: print(net, sta, loc, chan) st.traces[0].stats.distance = counter stream += st except IndexError: pass # stream.normalize() # stream.detrend()
end_time = sys.argv[6] # print(sys.argv[1]) # print(sys.argv[2]) # print(sys.argv[3]) # print(sys.argv[4]) # print(sys.argv[5]) # print(sys.argv[6]) # print(sys.argv[7]) # print(sys.argv[8]) # print(sys.argv[9]) # exit(0) dpi = int(sys.argv[7]) sizex = int(sys.argv[8]) sizey = int(sys.argv[9]) client = Client(client_root) #("/mnt/ide/seed") tStart = UTCDateTime(start_time) #("2020-06-08T06:30:00.000") tEnd = UTCDateTime(end_time) #("2020-06-08T09:30:00.000") traces = client.get_waveforms(network, station, "*", channel, tStart, tEnd) for tr in traces: trId = tr.get_id() fileName = 'out_' + trId + '.png' #os.remove('out_'+trId+'.png')#TODO non li cancella, problema di permessi tr.plot( type='dayplot', dpi=dpi, x_labels_size=int(8 * 100 / int(dpi)), y_labels_size=int(8 * 100 / int(dpi)),
from obspy.clients.filesystem.sds import Client receivers = ['*****@*****.**'] smtp = 'mysmtpserver.mydomain.net' end = UTCDateTime() start = end - 3600 * 24 * 30 loc_id_list = ["00", "10"] chan = "BH" step = 3600 sds_path = "/SDS" sds_client = Client(sds_path) station_list = [ "AIS", "ATD", "CAN", "CCD", "CLF", "COYC", "CRZF", "DRV", "DZM", "ECH", "FDF", "FOMA", "FUTU", "HDC", "INU", "IVI", "KIP", "MBO", "MPG", "NOUC", "PAF", "PEL", "PPTF", "RER", "ROCAM", "SANVU", "SPB", "SSB", "TAM", "TAOE", "TRIS", "UNM", "WUS" ] t1 = start t2 = start + step ratios = {} nz = [] ez = [] en = []
def main(argv=None): MatplotlibBackend.switch_backend("AGG", sloppy=False) parser = ArgumentParser( prog='obspy-sds-report', description=__doc__, formatter_class=RawDescriptionHelpFormatter) parser.add_argument( '-r', '--sds-root', dest='sds_root', required=True, help='Root folder of SDS archive.') parser.add_argument( '-o', '--output', dest='output', required=True, help='Full path (absolute or relative) of output files, without ' 'suffix (e.g. ``/tmp/sds_report``).') parser.add_argument( '-u', '--update', dest='update', default=False, action="store_true", help='Only update latency information, reuse previously computed list ' 'of streams to check and data percentage and gap count. Many ' 'other options e.g. regarding stream selection (``--id``, ' ' ``--location``, ..) and time span of data quality checks ' '(``--check-quality-days``) will be without effect if this ' 'option is specified. Only updating latency is significantly ' 'faster than a full analysis run, a normal use case is to do a ' 'full run once or twice per day and update latency every 5 or ' 'ten minutes. An exception is raised if an update is specified ' 'but the necessary file is not yet present.') parser.add_argument( '-l', '--location', dest='locations', action="append", help='Location codes to look for (e.g. ``""`` for empty location code ' 'or ``"00"``). This option can be provided multiple times and ' 'must be specified at least once for a full run (i.e. without ' '``--update`` option). While network/station combinations are ' 'automatically discovered, only streams whose location codes are ' 'provided here will be discovered and taken into account and ' 'ultimately displayed.') parser.add_argument( '-c', '--channel', dest='channels', action="append", help='Channel codes to look for (e.g. specified three times with ' '``HHZ``, ``EHZ`` and ``ELZ`` to cover all stations that serve ' 'a broad-band, short-period or low gain vertical channel). ' 'This option can be provided multiple times and must be ' 'specified at least once for a full run (i.e. without ' '``--update`` option). Only one stream per ' 'network/station/location combination will be displayed, ' 'selected by the lowest latency.') parser.add_argument( '-i', '--id', dest='ids', action="append", default=[], help='SEED IDs of streams that should be included in addition to the ' 'autodiscovery of streams controlled by ``--location`` and ' '``--channel`` options (e.g. ``IU.ANMO..LHZ``). ' 'This option can be provided multiple times.') parser.add_argument( '--skip', dest='skip', action="append", default=[], help='Networks or stations that should be skipped (e.g. ``IU`` or ' '``IU.ANMO``). This option can be provided multiple times.') parser.add_argument( '-f', '--format', default="MSEED", choices=ENTRY_POINTS['waveform'], help='Waveform format of SDS archive. Should be "MSEED" in most ' 'cases. Use ``None`` or empty string for format autodection ' '(slower and should not be necessary in most all cases). ' 'Warning: formats that do not support ``headonly`` ' 'option in ``read()`` operation will be significantly slower).') parser.add_argument( '--check-backwards-days', dest='check_back_days', default=30, type=int, help='Check for latency backwards for this many days.') parser.add_argument( '--check-quality-days', dest='check_quality_days', default=7, type=int, help='Calculate and plot data availability and number of ' 'gaps for a period of this many days.') parser.add_argument( '--latency-warn', dest='latency_warn', default=3600, type=float, help='Latency warning threshold in seconds.') parser.add_argument( '--latency-warn-color', dest='latency_warn_color', default="#FFFF33", help='Latency warning threshold color (valid HTML color string).') parser.add_argument( '--latency-error', dest='latency_error', default=24 * 3600, type=float, help='Latency error threshold in seconds.') parser.add_argument( '--latency-error-color', dest='latency_error_color', default="#E41A1C", help='Latency error threshold color (valid HTML color string).') parser.add_argument( '--percentage-warn', dest='percentage_warn', default=99.5, type=float, help='Data availability percentage warning threshold (``0`` to ' '``100``).') parser.add_argument( '--gaps-warn', dest='gaps_warn', default=20, type=int, help='Gap/overlap number warning threshold.') parser.add_argument( '--data-quality-warn-color', dest='data_quality_warn_color', default="#377EB8", help='Data quality (percentage/gap count) warning color ' '(valid HTML color string).') parser.add_argument( '--outdated-color', dest='outdated_color', default="#808080", help='Color for streams that have no data in check range ' '(valid HTML color string).') parser.add_argument( '--ok-color', dest='ok_color', default="#4DAF4A", help='Color for streams that pass all checks (valid HTML color ' 'string).') parser.add_argument( '--background-color', dest='background_color', default="#999999", help='Color for background of page (valid HTML color string).') parser.add_argument( '-V', '--version', action='version', version='%(prog)s ' + __version__) args = parser.parse_args(argv) now = UTCDateTime() stop_time = now - args.check_back_days * 24 * 3600 client = Client(args.sds_root) dtype_streamfile = np.dtype("U10, U30, U10, U10, f8, f8, i8") availability_check_endtime = now - 3600 availability_check_starttime = ( availability_check_endtime - (args.check_quality_days * 24 * 3600)) streams_file = args.output + ".txt" html_file = args.output + ".html" scan_file = args.output + ".png" if args.format.upper() == "NONE" or args.format == "": args.format = None # check whether to set up list of streams to check or use existing list # update list of streams once per day at nighttime if args.update: if not Path(streams_file).is_file(): msg = ("Update flag specified, but no output of previous full run " "was present in the expected location (as determined by " "``--output`` flag: {})").format(streams_file) raise IOError(msg) # use existing list of streams and availability information, just # update latency nslc = np.loadtxt(streams_file, delimiter=",", dtype=dtype_streamfile) else: if not args.locations or not args.channels: msg = ("At least one location code ``--location`` and at least " "one channel code ``--channel`` must be specified.") raise ObsPyException(msg) nsl = set() # get all network/station combinations in SDS archive for net, sta in client.get_all_stations(): if net in args.skip or ".".join((net, sta)) in args.skip: continue # for all combinations of user specified location and channel codes # check if data is in SDS archive for loc in args.locations: for cha in args.channels: if client.has_data(net, sta, loc, cha): # for now omit channel information, we only include the # channel with lowest latency later on nsl.add((net, sta, loc)) break nsl = sorted(nsl) nslc = [] # determine which channel to check for each network/station/location # combination for net, sta, loc in nsl: latency = [] # check latency of all channels that should be checked for cha in args.channels: latency_ = client.get_latency( net, sta, loc, cha, stop_time=stop_time, check_has_no_data=False) latency.append(latency_ or np.inf) # only include the channel with lowest latency in our stream list cha = args.channels[np.argmin(latency)] latency = np.min(latency) nslc.append((net, sta, loc, cha, latency)) for id in args.ids: net, sta, loc, cha = id.split(".") latency = client.get_latency( net, sta, loc, cha, stop_time=stop_time, check_has_no_data=False) latency = latency or np.inf nslc.append((net, sta, loc, cha, latency)) nslc_ = [] # request and assemble availability information. # this takes pretty long (on network/slow file systems), # so we only do it during a full run here, not during update for net, sta, loc, cha, latency in nslc: percentage, gap_count = client.get_availability_percentage( net, sta, loc, cha, availability_check_starttime, availability_check_endtime) nslc_.append((net, sta, loc, cha, latency, percentage, gap_count)) nslc = nslc_ # write stream list and availability information to file nslc = np.array(sorted(nslc), dtype=dtype_streamfile) np.savetxt(streams_file, nslc, delimiter=",", fmt=["%s", "%s", "%s", "%s", "%f", "%f", "%d"]) # generate obspy-scan image files = [] seed_ids = set() for nslc_ in nslc: net, sta, loc, cha, latency, _, _ = nslc_ if np.isinf(latency) or latency > args.check_back_days * 24 * 3600: continue seed_ids.add(".".join((net, sta, loc, cha))) files += client._get_filenames( net, sta, loc, cha, availability_check_starttime, availability_check_endtime) scan(files, format=args.format, starttime=availability_check_starttime, endtime=availability_check_endtime, plot=scan_file, verbose=False, recursive=True, ignore_links=False, seed_ids=seed_ids, print_gaps=False) # request and assemble current latency information data = [] for net, sta, loc, cha, latency, percentage, gap_count in nslc: if args.update: latency = client.get_latency( net, sta, loc, cha, stop_time=stop_time, check_has_no_data=False) latency = latency or np.inf data.append((net, sta, loc, cha, latency, percentage, gap_count)) # separate out the long dead streams data_normal = [] data_outdated = [] for data_ in data: latency = data_[4] if np.isinf(latency) or latency > args.check_back_days * 24 * 3600: data_outdated.append(data_) else: data_normal.append(data_) # write html output to file html = _format_html(args, data_normal, data_outdated) with open(html_file, "wt") as fh: fh.write(html)
def test_read_from_sds(self): """ Test reading data across year and day breaks from SDS directory structure. Also tests looking for data on the wrong side of a day break (data usually get written for some seconds into the previous or next file around midnight). """ # test for one specific SEED ID, without wildcards seed_id = "AB.XYZ..HHZ" net, sta, loc, cha = seed_id.split(".") # use three different day breaks in the testing: # - normal day break during one year # (same directory, separate filenames) # - day break at end of year # (separate directories, separate filenames) # - leap-year # - non-leap-year for year, doy in ((2015, 123), (2015, 1), (2012, 1)): t = UTCDateTime("%d-%03dT00:00:00" % (year, doy)) with TemporarySDSDirectory(year=year, doy=doy) as temp_sds: # normal test reading across the day break client = Client(temp_sds.tempdir) st = client.get_waveforms(net, sta, loc, cha, t - 20, t + 20) self.assertEqual(len(st), 1) self.assertEqual(st[0].stats.starttime, t - 20) self.assertEqual(st[0].stats.endtime, t + 20) self.assertEqual(len(st[0]), 5) # test merge option st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200, merge=False) self.assertEqual(len(st), 2) st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200, merge=None) self.assertEqual(len(st), 2) st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200, merge=0) self.assertEqual(len(st), 1) # test reading data from a single day file # (data is in the file where it's expected) st = client.get_waveforms(net, sta, loc, cha, t - 80, t - 30) self.assertEqual(len(st), 1) # test reading data from a single day file # (data is in the dayfile of the previous day) st = client.get_waveforms(net, sta, loc, cha, t + 20, t + 40) self.assertEqual(len(st), 1) # test that format autodetection with `format=None` works client = Client(temp_sds.tempdir, format=None) st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), 1)
def read_traces(config): basepath = config.data_dir if config.data_day: basepath = os.path.join(basepath, config.data_day) if config.data_hours: basepath = os.path.join(basepath, config.data_hours) kwargs = {} if config.data_format: kwargs['format'] = config.data_format tmpst = Stream() if config.dataarchive_type == 'SDS': client = Client(basepath) start_t = UTCDateTime(config.start_time) end_t = UTCDateTime(config.end_time) tmpst = client.get_waveforms(config.data_network, "*", "*", '*', start_t, end_t) else: for filename in glob(os.path.join(basepath, '*')): try: if config.start_time: tmpst += read(filename, starttime=UTCDateTime(config.start_time), endtime=UTCDateTime(config.end_time), **kwargs) else: tmpst += read(filename, **kwargs) except Exception: continue # Get the intersection between the list of available stations # and the list of requested stations: tmpst_select = Stream() for ch in config.channel: tmpst_select += tmpst.select(channel=ch) tmpst_stations = [tr.stats.station for tr in tmpst_select] stations = sorted(set(tmpst_stations) & set(config.stations)) # Retain only requested channel and stations: st = Stream(tr for tr in tmpst_select if tr.stats.station in stations) if not st: print('Could not read any trace!') sys.exit(1) st.sort() # Check sampling rate config.delta = None for tr in st: tr.detrend(type='constant') tr.taper(type='hann', max_percentage=0.005, side='left') sampling_rate = tr.stats.sampling_rate # Resample data, if requested if config.sampl_rate_data: if sampling_rate >= config.sampl_rate_data: dec_ct = int(sampling_rate / config.sampl_rate_data) tr.decimate(dec_ct, strict_length=False, no_filter=True) else: raise ValueError( 'Sampling frequency for trace %s is lower than %s' % (tr.id, config.sampl_rate_data)) delta = tr.stats.delta if config.delta is None: config.delta = delta else: if delta != config.delta: raise ValueError( 'Trace %s has different delta: %s (expected: %s)' % (tr.id, delta, config.delta)) # Recompute sampling rate after resampling config.sampl_rate_data = st[0].stats.sampling_rate print('Number of traces in stream = ', len(st)) # Check for common starttime and endtime of the traces st_starttime = max([tr.stats.starttime for tr in st]) st_endtime = min([tr.stats.endtime for tr in st]) if config.start_time: st.trim(max(st_starttime, UTCDateTime(config.start_time)), min(st_endtime, UTCDateTime(config.end_time))) else: st.trim(st_starttime, st_endtime) # --- cut the data to the selected length dt------------------------------ if config.cut_data: st.trim(st[0].stats.starttime + config.cut_start, st[0].stats.starttime + config.cut_start + config.cut_delta) else: config.cut_start = 0. config.starttime = st[0].stats.starttime # attach station list and trace ids to config file config.stations = stations config.trids = [tr.id for tr in st] return st