def find_seismogram(top_level_dir, starttime, endtime, stations=['S12', 'S14', 'S15', 'S16'], channels=['MH1', 'MH2', 'MHZ'], dir_type='pdart_dir'): for station in stations: stream = Stream() channel = '*' if dir_type == 'processed_dir': dir = find_processed_dir(top_level_dir, starttime.year, station) filename = '*%s.%s.%s.%s.%s.%03d*.gz' % ( 'XA', station, '*', channel, str( starttime.year), starttime.julday) else: dir = find_dir(top_level_dir, starttime.year, station, channel) filename = '%s.%s.%s.%s.%s.%03d.gz' % ('XA', station, '*', channel, str(starttime.year), starttime.julday) filename = os.path.join(dir, filename) try: stream += read(filename) except Exception as e: print(str(e)) if starttime.julday != endtime.julday: if dir_type == 'processed_dir': dir = find_processed_dir(top_level_dir, endtime.year, station) filename = '*%s.%s.%s.%s.%s.%03d*.gz' % ( 'XA', station, '*', channel, str( endtime.year), endtime.julday) else: dir = find_dir(top_level_dir, endtime.year, station, channel) filename = '*%s.%s.%s.%s.%s.%03d*.gz' % ( 'XA', station, '*', channel, str( endtime.year), endtime.julday) filename = os.path.join(dir, filename) try: stream += read(filename) except Exception as e: print(str(e)) # print('Before ', stream) stream = stream.trim(starttime=starttime, endtime=endtime) # print('After ', stream) if stream is not None and len(stream) > 0: for tr in stream: tr.stats.location = '' if tr.stats.channel not in channels: stream.remove(tr) stream.merge() return stream
def get_streams_gema(networks, stations, starttime, endtime, only_vertical_channel=False, local_dir_name=None): if not local_dir_name: local_dir_name = "%s/archive" % (os.getenv("HOME")) if only_vertical_channel: channels = "*Z" else: channels = "*" # READ ARCHIVE DATABASE st = Stream() this_day = UTCDateTime(starttime.strftime("%Y-%m-%d")) last_day = UTCDateTime(endtime.strftime("%Y-%m-%d")) while this_day <= last_day: for network, station in zip(networks, stations): pattern = '%s/%s/%s/%s/%s*' % (local_dir_name, this_day.strftime("%Y"), network, station, channels) paths_ch = sorted(glob.glob(pattern)) for path in paths_ch: pattern = "%s/*%s" % (path, this_day.strftime("%Y.%03j")) msfile_list = glob.glob(pattern) if len(msfile_list) > 0: for msfile in msfile_list: st += read(msfile, starttime=starttime, endtime=endtime) this_day += 86400 # PATCH PROBLEM DIFFERENT SAMPLING RATES IN LONQ STATION FROM SCREAM for tr in st.select(station="LONQ"): if tr.stats.sampling_rate != 50: st.remove(tr) # EXPORT GAPS AND MERGE STREAM gaps = st.get_gaps() if len(st) > 0: # and len(gaps)>0 st.trim(starttime, endtime) st.merge(method=1, interpolation_samples=-1, fill_value='interpolate') return st, gaps
def preprocess(db, stations, comps, goal_day, params, responses=None): """ Fetches data for each ``stations`` and each ``comps`` using the data_availability table in the database. To correct for instrument responses, make sure to set ``remove_response`` to "Y" in the config and to provide the ``responses`` DataFrame. :Example: >>> from msnoise.api import connect, get_params, preload_instrument_responses >>> from msnoise.preprocessing import preprocess >>> db = connect() >>> params = get_params(db) >>> responses = preload_instrument_responses(db) >>> st = preprocess(db, ["YA.UV06","YA.UV10"], ["Z",], "2010-09-01", params, responses) >>> st 2 Trace(s) in Stream: YA.UV06.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples YA.UV10.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples :type db: :class:`sqlalchemy.orm.session.Session` :param db: A :class:`~sqlalchemy.orm.session.Session` object, as obtained by :func:`msnoise.api.connect`. :type stations: list of str :param stations: a list of station names, in the format NET.STA. :type comps: list of str :param comps: a list of component names, in Z,N,E,1,2. :type goal_day: str :param goal_day: the day of data to load, ISO 8601 format: e.g. 2016-12-31. :type params: class :param params: an object containing the config parameters, as obtained by :func:`msnoise.api.get_params`. :type responses: :class:`pandas.DataFrame` :param responses: a DataFrame containing the instrument responses, as obtained by :func:`msnoise.api.preload_instrument_responses`. :rtype: :class:`obspy.core.stream.Stream` :return: A Stream object containing all traces. """ datafiles = {} output = Stream() for station in stations: datafiles[station] = {} net, sta = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability(db, net=net, sta=sta, starttime=gd, endtime=gd) for comp in comps: datafiles[station][comp] = [] for file in files: if file.comp[-1] not in comps: continue fullpath = os.path.join(file.path, file.file) datafiles[station][file.comp[-1]].append(fullpath) for istation, station in enumerate(stations): net, sta = station.split(".") for comp in comps: files = eval("datafiles['%s']['%s']" % (station, comp)) if len(files) != 0: logging.debug("%s.%s Reading %i Files" % (station, comp, len(files))) stream = Stream() for file in sorted(files): try: st = read(file, dytpe=np.float, starttime=UTCDateTime(gd), endtime=UTCDateTime(gd) + 86400) except: logging.debug("ERROR reading file %s" % file) continue for tr in st: if len(tr.stats.channel) == 2: tr.stats.channel += tr.stats.location tr.stats.location = "00" tmp = st.select(network=net, station=sta, component=comp) if not len(tmp): for tr in st: tr.stats.network = net st = st.select(network=net, station=sta, component=comp) else: st = tmp for tr in st: tr.data = tr.data.astype(np.float) tr.stats.network = tr.stats.network.upper() tr.stats.station = tr.stats.station.upper() tr.stats.channel = tr.stats.channel.upper() stream += st del st stream.sort() try: # HACK not super clean... should find a way to prevent the # same trace id with different sps to occur stream.merge(method=1, interpolation_samples=3, fill_value=None) except: logging.debug("Error while merging...") traceback.print_exc() continue stream = stream.split() if not len(stream): continue logging.debug("%s Checking sample alignment" % stream[0].id) for i, trace in enumerate(stream): stream[i] = check_and_phase_shift(trace) logging.debug("%s Checking Gaps" % stream[0].id) if len(getGaps(stream)) > 0: max_gap = params.preprocess_max_gap * stream[ 0].stats.sampling_rate only_too_long = False while getGaps(stream) and not only_too_long: too_long = 0 gaps = getGaps(stream) for gap in gaps: if int(gap[-1]) <= max_gap: try: stream[gap[0]] = stream[gap[0]].__add__( stream[gap[1]], method=1, fill_value="interpolate") stream.remove(stream[gap[1]]) except: stream.remove(stream[gap[1]]) break else: too_long += 1 if too_long == len(gaps): only_too_long = True stream = stream.split() for tr in stream: if tr.stats.sampling_rate < (params.goal_sampling_rate - 1): stream.remove(tr) taper_length = 20.0 # seconds for trace in stream: if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate: stream.remove(trace) else: trace.detrend(type="demean") trace.detrend(type="linear") trace.taper(max_percentage=None, max_length=1.0) if not len(stream): logging.debug(" has only too small traces, skipping...") continue for trace in stream: logging.debug("%s Highpass at %.2f Hz" % (trace.id, params.preprocess_highpass)) trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True) if trace.stats.sampling_rate != params.goal_sampling_rate: logging.debug("%s Lowpass at %.2f Hz" % (trace.id, params.preprocess_lowpass)) trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8) if params.resampling_method == "Resample": logging.debug( "%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = resample( trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest') elif params.resampling_method == "Decimate": decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate if not int(decimation_factor) == decimation_factor: logging.warning( "%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods" " Trace sampling rate = %i ; Desired CC sampling rate = %i" % (trace.id, trace.stats.sampling_rate, params.goal_sampling_rate)) sys.stdout.flush() sys.exit() logging.debug("%s Decimate by a factor of %i" % (trace.id, decimation_factor)) trace.data = trace.data[::int(decimation_factor)] elif params.resampling_method == "Lanczos": logging.debug( "%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = np.array(trace.data) trace.interpolate( method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0) trace.stats.sampling_rate = params.goal_sampling_rate if params.remove_response: logging.debug('%s Removing instrument response' % stream[0].id) response = responses[responses["channel_id"] == stream[0].id] if len(response) > 1: response = response[ response["start_date"] <= UTCDateTime(gd)] if len(response) > 1: response = response[ response["end_date"] >= UTCDateTime(gd)] elif len(response) == 0: logging.info("No instrument response information " "for %s, skipping" % stream[0].id) continue try: datalesspz = response["paz"].values[0] except: logging.error("Bad instrument response information " "for %s, skipping" % stream[0].id) continue stream.simulate( paz_remove=datalesspz, remove_sensitivity=True, pre_filt=params.response_prefilt, paz_simulate=None, ) for tr in stream: tr.data = tr.data.astype(np.float32) output += stream del stream del files clean_scipy_cache() return output
def preprocess(db, stations, comps, goal_day, params, responses=None): datafiles = {} output = Stream() for station in stations: datafiles[station] = {} net, sta = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability(db, net=net, sta=sta, starttime=gd, endtime=gd) for comp in comps: datafiles[station][comp] = [] for file in files: if file.comp[-1] not in comps: continue fullpath = os.path.join(file.path, file.file) datafiles[station][file.comp[-1]].append(fullpath) for istation, station in enumerate(stations): net, sta = station.split(".") for comp in comps: files = eval("datafiles['%s']['%s']" % (station, comp)) if len(files) != 0: logging.debug("%s.%s Reading %i Files" % (station, comp, len(files))) stream = Stream() for file in sorted(files): st = read(file, dytpe=np.float, starttime=UTCDateTime(gd), endtime=UTCDateTime(gd) + 86400) tmp = st.select(network=net, station=sta, component=comp) if not len(tmp): for tr in st: tr.stats.network = net st = st.select(network=net, station=sta, component=comp) else: st = tmp for tr in st: tr.data = tr.data.astype(np.float) stream += st del st stream.sort() stream.merge(method=1, interpolation_samples=3, fill_value=None) stream = stream.split() logging.debug("Checking sample alignment") for i, trace in enumerate(stream): stream[i] = check_and_phase_shift(trace) logging.debug("Checking Gaps") if len(getGaps(stream)) > 0: max_gap = 10 only_too_long = False while getGaps(stream) and not only_too_long: too_long = 0 gaps = getGaps(stream) for gap in gaps: if int(gap[-1]) <= max_gap: stream[gap[0]] = stream[gap[0]].__add__( stream[gap[1]], method=1, fill_value="interpolate") stream.remove(stream[gap[1]]) break else: too_long += 1 if too_long == len(gaps): only_too_long = True stream = stream.split() taper_length = 20.0 # seconds for trace in stream: if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate: stream.remove(trace) else: trace.detrend(type="demean") trace.detrend(type="linear") trace.taper(max_percentage=None, max_length=1.0) if not len(stream): logging.debug(" has only too small traces, skipping...") continue for trace in stream: logging.debug("%s.%s Highpass at %.2f Hz" % (station, comp, params.preprocess_highpass)) trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True) if trace.stats.sampling_rate != params.goal_sampling_rate: logging.debug( "%s.%s Lowpass at %.2f Hz" % (station, comp, params.preprocess_lowpass)) trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8) if params.resampling_method == "Resample": logging.debug( "%s.%s Downsample to %.1f Hz" % (station, comp, params.goal_sampling_rate)) trace.data = resample( trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest') elif params.resampling_method == "Decimate": decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate if not int(decimation_factor) == decimation_factor: logging.warning( "%s.%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods" " Trace sampling rate = %i ; Desired CC sampling rate = %i" % (station, comp, trace.stats.sampling_rate, params.goal_sampling_rate)) sys.stdout.flush() sys.exit() logging.debug("%s.%s Decimate by a factor of %i" % (station, comp, decimation_factor)) trace.data = trace.data[::int(decimation_factor)] elif params.resampling_method == "Lanczos": logging.debug( "%s.%s Downsample to %.1f Hz" % (station, comp, params.goal_sampling_rate)) trace.data = np.array(trace.data) trace.interpolate( method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0) trace.stats.sampling_rate = params.goal_sampling_rate if get_config(db, 'remove_response', isbool=True): logging.debug('%s Removing instrument response' % stream[0].id) response_prefilt = eval(get_config(db, 'response_prefilt')) response = responses[responses["channel_id"] == stream[0].id] if len(response) > 1: response = response[ response["start_date"] < UTCDateTime(gd)] response = response[ response["end_date"] > UTCDateTime(gd)] elif len(response) == 0: logging.info("No instrument response information " "for %s, exiting" % stream[0].id) sys.exit() datalesspz = response["paz"].values[0] stream.simulate( paz_remove=datalesspz, remove_sensitivity=True, pre_filt=response_prefilt, paz_simulate=None, ) for tr in stream: tr.data = tr.data.astype(np.float32) output += stream del stream del files clean_scipy_cache() return 0, output
def preprocess(db, stations, comps, goal_day, params, responses=None): """ Fetches data for each ``stations`` and each ``comps`` using the data_availability table in the database. To correct for instrument responses, make sure to set ``remove_response`` to "Y" in the config and to provide the ``responses`` DataFrame. :Example: >>> from msnoise.api import connect, get_params, preload_instrument_responses >>> from msnoise.preprocessing import preprocess >>> db = connect() >>> params = get_params(db) >>> responses = preload_instrument_responses(db) >>> st = preprocess(db, ["YA.UV06","YA.UV10"], ["Z",], "2010-09-01", params, responses) >>> st 2 Trace(s) in Stream: YA.UV06.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples YA.UV10.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples :type db: :class:`sqlalchemy.orm.session.Session` :param db: A :class:`~sqlalchemy.orm.session.Session` object, as obtained by :func:`msnoise.api.connect`. :type stations: list of str :param stations: a list of station names, in the format NET.STA. :type comps: list of str :param comps: a list of component names, in Z,N,E,1,2. :type goal_day: str :param goal_day: the day of data to load, ISO 8601 format: e.g. 2016-12-31. :type params: class :param params: an object containing the config parameters, as obtained by :func:`msnoise.api.get_params`. :type responses: :class:`pandas.DataFrame` :param responses: a DataFrame containing the instrument responses, as obtained by :func:`msnoise.api.preload_instrument_responses`. :rtype: :class:`obspy.core.stream.Stream` :return: A Stream object containing all traces. """ datafiles = {} output = Stream() MULTIPLEX = False MULTIPLEX_files = {} for station in stations: datafiles[station] = {} net, sta, loc = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability(db, net=net, sta=sta, loc=loc, starttime=gd, endtime=gd) for comp in comps: datafiles[station][comp] = [] for file in files: if file.sta != "MULTIPLEX": if file.chan[-1] not in comps: continue fullpath = os.path.join(file.path, file.file) datafiles[station][file.chan[-1]].append(fullpath) else: MULTIPLEX = True print("Mutliplex mode, reading the files") fullpath = os.path.join(file.path, file.file) multiplexed = sorted(glob.glob(fullpath)) for comp in comps: for fn in multiplexed: if fn in MULTIPLEX_files: _ = MULTIPLEX_files[fn] else: # print("Reading %s" % fn) _ = read(fn, format=params.archive_format or None) traces = [] for tr in _: if "%s.%s" % ( tr.stats.network, tr.stats.station ) in stations and tr.stats.channel[-1] in comps: traces.append(tr) del _ _ = Stream(traces=traces) MULTIPLEX_files[fn] = _ datafiles[station][comp].append(_) for istation, station in enumerate(stations): net, sta, loc = station.split(".") for comp in comps: files = eval("datafiles['%s']['%s']" % (station, comp)) if len(files) != 0: logger.debug("%s.%s Reading %i Files" % (station, comp, len(files))) traces = [] for file in files: if isinstance(file, Stream): st = file.select(network=net, station=sta, component=comp).copy() else: try: # print("Reading %s" % file) # t= time.time() st = read(file, dytpe=np.float, starttime=UTCDateTime(gd), endtime=UTCDateTime(gd) + 86400, station=sta, format=params.archive_format or None) # print("done in", time.time()-t) except: logger.debug("ERROR reading file %s" % file) # TODO add traceback (optional?) continue for tr in st: if len(tr.stats.channel) == 2: tr.stats.channel += tr.stats.location tr.stats.location = "00" tmp = st.select(network=net, station=sta, component=comp) if not len(tmp): for tr in st: tr.stats.network = net st = st.select(network=net, station=sta, component=comp) else: st = tmp for tr in st: tr.data = tr.data.astype(np.float) tr.stats.network = tr.stats.network.upper() tr.stats.station = tr.stats.station.upper() tr.stats.channel = tr.stats.channel.upper() if tr.stats.location == "": tr.stats.location = "--" traces.append(tr) del st stream = Stream(traces=traces) if not (len(stream)): continue f = io.BytesIO() stream.write(f, format='MSEED') f.seek(0) stream = read(f, format="MSEED") stream.sort() # try: # # HACK not super clean... should find a way to prevent the # # same trace id with different sps to occur # stream.merge(method=1, interpolation_samples=3, fill_value=None) # except: # logger.debug("Error while merging...") # traceback.print_exc() # continue # stream = stream.split() if not len(stream): continue logger.debug("%s Checking sample alignment" % stream[0].id) for i, trace in enumerate(stream): stream[i] = check_and_phase_shift( trace, params.preprocess_taper_length) logger.debug("%s Checking Gaps" % stream[0].id) if len(getGaps(stream)) > 0: max_gap = params.preprocess_max_gap * stream[ 0].stats.sampling_rate gaps = getGaps(stream) while len(gaps): too_long = 0 for gap in gaps: if int(gap[-1]) <= max_gap: try: stream[gap[0]] = stream[gap[0]].__add__( stream[gap[1]], method=1, fill_value="interpolate") stream.remove(stream[gap[1]]) except: stream.remove(stream[gap[1]]) break else: too_long += 1 if too_long == len(gaps): break gaps = getGaps(stream) del gaps stream = stream.split() for tr in stream: if tr.stats.sampling_rate < (params.goal_sampling_rate - 1): stream.remove(tr) taper_length = params.preprocess_taper_length # seconds for trace in stream: if trace.stats.npts < (4 * taper_length * trace.stats.sampling_rate): stream.remove(trace) else: trace.detrend(type="demean") trace.detrend(type="linear") trace.taper(max_percentage=None, max_length=taper_length) if not len(stream): logger.debug(" has only too small traces, skipping...") continue for trace in stream: logger.debug("%s Highpass at %.2f Hz" % (trace.id, params.preprocess_highpass)) trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True, corners=4) if trace.stats.sampling_rate != params.goal_sampling_rate: logger.debug("%s Lowpass at %.2f Hz" % (trace.id, params.preprocess_lowpass)) trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8) if params.resampling_method == "Resample": logger.debug("%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = resample( trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest') elif params.resampling_method == "Decimate": decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate if not int(decimation_factor) == decimation_factor: logger.warning( "%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods" " Trace sampling rate = %i ; Desired CC sampling rate = %i" % (trace.id, trace.stats.sampling_rate, params.goal_sampling_rate)) sys.stdout.flush() sys.exit() logger.debug("%s Decimate by a factor of %i" % (trace.id, decimation_factor)) trace.data = trace.data[::int(decimation_factor)] elif params.resampling_method == "Lanczos": logger.debug("%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = np.array(trace.data) trace.interpolate( method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0) trace.stats.sampling_rate = params.goal_sampling_rate del trace if params.remove_response: logger.debug('%s Removing instrument response' % stream[0].id) try: stream.attach_response(responses) stream.remove_response( pre_filt=params.response_prefilt, taper=False) except: logger.error("Bad or no instrument response " "information for %s, skipping" % stream[0].id) continue for tr in stream: tr.data = tr.data.astype(np.float32) output += stream del stream del files clean_scipy_cache() del MULTIPLEX_files return output
def main(): print() print( "################################################################################" ) print( "# __ _ _ #" ) print( "# _ __ / _|_ __ _ _ | |__ __ _ _ __ _ __ ___ ___ _ __ (_) ___ ___ #" ) print( "# | '__| |_| '_ \| | | | | '_ \ / _` | '__| '_ ` _ \ / _ \| '_ \| |/ __/ __| #" ) print( "# | | | _| |_) | |_| | | | | | (_| | | | | | | | | (_) | | | | | (__\__ \ #" ) print( "# |_| |_| | .__/ \__, |___|_| |_|\__,_|_| |_| |_| |_|\___/|_| |_|_|\___|___/ #" ) print( "# |_| |___/_____| #" ) print( "# #" ) print( "################################################################################" ) print() # Run Input Parser args = get_harmonics_arguments() # Load Database db, stkeys = stdb.io.load_db(fname=args.indb, keys=args.stkeys) # Track processed folders procfold = [] # Loop over station keys for stkey in list(stkeys): # Extract station information from dictionary sta = db[stkey] # Construct Folder Name stfld = stkey if not args.lkey: stfld = stkey.split('.')[0] + "." + stkey.split('.')[1] # Define path to see if it exists if args.phase in ['P', 'PP', 'allP']: datapath = Path('P_DATA') / stfld elif args.phase in ['S', 'SKS', 'allS']: datapath = Path('S_DATA') / stfld if not datapath.is_dir(): print('Path to ' + str(datapath) + ' doesn`t exist - continuing') continue # Get search start time if args.startT is None: tstart = sta.startdate else: tstart = args.startT # Get search end time if args.endT is None: tend = sta.enddate else: tend = args.endT if tstart > sta.enddate or tend < sta.startdate: continue # Temporary print locations tlocs = sta.location if len(tlocs) == 0: tlocs = [''] for il in range(0, len(tlocs)): if len(tlocs[il]) == 0: tlocs[il] = "--" sta.location = tlocs # Update Display print(" ") print(" ") print("|===============================================|") print("|===============================================|") print("| {0:>8s} |".format( sta.station)) print("|===============================================|") print("|===============================================|") print("| Station: {0:>2s}.{1:5s} |".format( sta.network, sta.station)) print("| Channel: {0:2s}; Locations: {1:15s} |".format( sta.channel, ",".join(tlocs))) print("| Lon: {0:7.2f}; Lat: {1:6.2f} |".format( sta.longitude, sta.latitude)) print("| Start time: {0:19s} |".format( sta.startdate.strftime("%Y-%m-%d %H:%M:%S"))) print("| End time: {0:19s} |".format( sta.enddate.strftime("%Y-%m-%d %H:%M:%S"))) print("|-----------------------------------------------|") # Check for folder already processed if stfld in procfold: print(' {0} already processed...skipping '.format(stfld)) continue rfRstream = Stream() rfTstream = Stream() datafiles = [x for x in datapath.iterdir() if x.is_dir()] for folder in datafiles: # Skip hidden folders if folder.name.startswith('.'): continue date = folder.name.split('_')[0] year = date[0:4] month = date[4:6] day = date[6:8] dateUTC = UTCDateTime(year + '-' + month + '-' + day) if dateUTC > tstart and dateUTC < tend: filename = folder / "RF_Data.pkl" if filename.is_file(): file = open(filename, "rb") rfdata = pickle.load(file) if rfdata[0].stats.snrh > args.snrh and \ rfdata[0].stats.snr and \ rfdata[0].stats.cc > args.cc: rfRstream.append(rfdata[1]) rfTstream.append(rfdata[2]) file.close() else: continue if args.no_outl: # Remove outliers wrt variance varR = np.array([np.var(tr.data) for tr in rfRstream]) # Calculate outliers medvarR = np.median(varR) madvarR = 1.4826 * np.median(np.abs(varR - medvarR)) robustR = np.abs((varR - medvarR) / madvarR) outliersR = np.arange(len(rfRstream))[robustR > 2.] for i in outliersR[::-1]: rfRstream.remove(rfRstream[i]) rfTstream.remove(rfTstream[i]) # Do the same for transverse varT = np.array([np.var(tr.data) for tr in rfTstream]) medvarT = np.median(varT) madvarT = 1.4826 * np.median(np.abs(varT - medvarT)) robustT = np.abs((varT - medvarT) / madvarT) outliersT = np.arange(len(rfTstream))[robustT > 2.] for i in outliersT[::-1]: rfRstream.remove(rfRstream[i]) rfTstream.remove(rfTstream[i]) # Try binning if specified if args.nbin is not None: rf_tmp = binning.bin(rfRstream, rfTstream, typ='baz', nbin=args.nbin + 1) rfRstream = rf_tmp[0] rfTstream = rf_tmp[1] # Filter original streams rfRstream.filter('bandpass', freqmin=args.bp[0], freqmax=args.bp[1], corners=2, zerophase=True) rfTstream.filter('bandpass', freqmin=args.bp[0], freqmax=args.bp[1], corners=2, zerophase=True) # Initialize the Harmonics object harmonics = Harmonics(rfRstream, rfTstream) # Stack with or without dip if args.find_azim: harmonics.dcomp_find_azim(xmin=args.trange[0], xmax=args.trange[1]) print("Optimal azimuth for trange between " + str(args.trange[0]) + " and " + str(args.trange[1]) + " seconds is: " + str(harmonics.azim)) else: harmonics.dcomp_fix_azim(azim=args.azim) if args.save_plot and not Path('FIGURES').is_dir(): Path('FIGURES').mkdir(parents=True) if args.plot: harmonics.plot(args.ymax, args.scale, args.save_plot, args.title, args.form) if args.save: filename = datapath / (harmonics.hstream[0].stats.station + ".harmonics.pkl") harmonics.save(filename) # Update processed folders procfold.append(stfld)
".." + channel + ".mseed") if os.path.isfile(file): st_temp += read(file) # load 24h continuous waveforms only if template exists for tt in st_temp: station = tt.stats.station channel = tt.stats.channel file1 = cont_dir + sday + "." + station + "." + channel # print(" file1 ===", file1) if os.path.isfile(file1): st_cont += read(file1) else: # remove from the template stream if continuous not exists st_temp.remove(tt) st_cont.filter("bandpass", freqmin=bandpass[0], freqmax=bandpass[1], zerophase=True) # define variables # st1_temp = st_temp.select(channel=channel[0]) tt = Trace() tc = Trace() count = 0 nmin = 0 npanels = len(st_temp) nfile = len(st_temp)
def main(): print() print("#########################################") print("# __ _ _ #") print("# _ __ / _|_ __ _ _ | |__ | | __ #") print("# | '__| |_| '_ \| | | | | '_ \| |/ / #") print("# | | | _| |_) | |_| | | | | | < #") print("# |_| |_| | .__/ \__, |___|_| |_|_|\_\ #") print("# |_| |___/_____| #") print("# #") print("#########################################") print() # Run Input Parser args = arguments.get_hk_arguments() # Load Database db = stdb.io.load_db(fname=args.indb) # Construct station key loop allkeys = db.keys() sorted(allkeys) # Extract key subset if len(args.stkeys) > 0: stkeys = [] for skey in args.stkeys: stkeys.extend([s for s in allkeys if skey in s]) else: stkeys = db.keys() sorted(stkeys) # Loop over station keys for stkey in list(stkeys): # Extract station information from dictionary sta = db[stkey] # Define path to see if it exists if args.phase in ['P', 'PP', 'allP']: datapath = Path('P_DATA') / stkey elif args.phase in ['S', 'SKS', 'allS']: datapath = Path('S_DATA') / stkey if not datapath.is_dir(): print('Path to ' + str(datapath) + ' doesn`t exist - continuing') continue # Define save path if args.save: savepath = Path('HK_DATA') / stkey if not savepath.is_dir(): print('Path to ' + str(savepath) + ' doesn`t exist - creating it') savepath.mkdir(parents=True) # Get search start time if args.startT is None: tstart = sta.startdate else: tstart = args.startT # Get search end time if args.endT is None: tend = sta.enddate else: tend = args.endT if tstart > sta.enddate or tend < sta.startdate: continue # Temporary print locations tlocs = sta.location if len(tlocs) == 0: tlocs = [''] for il in range(0, len(tlocs)): if len(tlocs[il]) == 0: tlocs[il] = "--" sta.location = tlocs # Update Display print(" ") print(" ") print("|===============================================|") print("|===============================================|") print("| {0:>8s} |".format( sta.station)) print("|===============================================|") print("|===============================================|") print("| Station: {0:>2s}.{1:5s} |".format( sta.network, sta.station)) print("| Channel: {0:2s}; Locations: {1:15s} |".format( sta.channel, ",".join(tlocs))) print("| Lon: {0:7.2f}; Lat: {1:6.2f} |".format( sta.longitude, sta.latitude)) print("| Start time: {0:19s} |".format( sta.startdate.strftime("%Y-%m-%d %H:%M:%S"))) print("| End time: {0:19s} |".format( sta.enddate.strftime("%Y-%m-%d %H:%M:%S"))) print("|-----------------------------------------------|") rfRstream = Stream() datafiles = [x for x in datapath.iterdir() if x.is_dir()] for folder in datafiles: # Skip hidden folders if folder.name.startswith('.'): continue date = folder.name.split('_')[0] year = date[0:4] month = date[4:6] day = date[6:8] dateUTC = UTCDateTime(year + '-' + month + '-' + day) if dateUTC > tstart and dateUTC < tend: # Load meta data metafile = folder / "Meta_Data.pkl" if not metafile.is_file(): continue meta = pickle.load(open(metafile, 'rb')) # Skip data not in list of phases if meta.phase not in args.listphase: continue # QC Thresholding if meta.snrh < args.snrh: continue if meta.snr < args.snr: continue if meta.cc < args.cc: continue # # Check bounds on data # if meta.slow < args.slowbound[0] and meta.slow > args.slowbound[1]: # continue # if meta.baz < args.bazbound[0] and meta.baz > args.bazbound[1]: # continue # If everything passed, load the RF data filename = folder / "RF_Data.pkl" if filename.is_file(): file = open(filename, "rb") rfdata = pickle.load(file) rfRstream.append(rfdata[1]) file.close() if rfdata[0].stats.npts != 1451: print(folder) if len(rfRstream) == 0: continue if args.no_outl: t1 = 0. t2 = 30. varR = [] for i in range(len(rfRstream)): taxis = rfRstream[i].stats.taxis tselect = (taxis > t1) & (taxis < t2) varR.append(np.var(rfRstream[i].data[tselect])) varR = np.array(varR) # Remove outliers wrt variance within time range medvarR = np.median(varR) madvarR = 1.4826 * np.median(np.abs(varR - medvarR)) robustR = np.abs((varR - medvarR) / madvarR) outliersR = np.arange(len(rfRstream))[robustR > 2.5] for i in outliersR[::-1]: rfRstream.remove(rfRstream[i]) print('') print("Number of radial RF data: " + str(len(rfRstream))) print('') # Try binning if specified if args.calc_dip: rf_tmp = binning.bin_baz_slow(rfRstream, nbaz=args.nbaz + 1, nslow=args.nslow + 1, pws=args.pws) rfRstream = rf_tmp[0] else: rf_tmp = binning.bin(rfRstream, typ='slow', nbin=args.nslow + 1, pws=args.pws) rfRstream = rf_tmp[0] # Get a copy of the radial component and filter if args.copy: rfRstream_copy = rfRstream.copy() rfRstream_copy.filter('bandpass', freqmin=args.bp_copy[0], freqmax=args.bp_copy[1], corners=2, zerophase=True) # Check bin counts: for tr in rfRstream: if (tr.stats.nbin < args.binlim): rfRstream.remove(tr) # Continue if stream is too short if len(rfRstream) < 5: continue if args.save_plot and not Path('HK_PLOTS').is_dir(): Path('HK_PLOTS').mkdir(parents=True) print('') print("Number of radial RF bins: " + str(len(rfRstream))) print('') # Filter original stream rfRstream.filter('bandpass', freqmin=args.bp[0], freqmax=args.bp[1], corners=2, zerophase=True) # Initialize the HkStack object try: hkstack = HkStack(rfRstream, rfV2=rfRstream_copy, strike=args.strike, dip=args.dip, vp=args.vp) except: hkstack = HkStack(rfRstream, strike=args.strike, dip=args.dip, vp=args.vp) # Update attributes hkstack.hbound = args.hbound hkstack.kbound = args.kbound hkstack.dh = args.dh hkstack.dk = args.dk hkstack.weights = args.weights # Stack with or without dip if args.calc_dip: hkstack.stack_dip() else: hkstack.stack() # Average stacks hkstack.average(typ=args.typ) if args.plot: hkstack.plot(args.save_plot, args.title, args.form) if args.save: filename = savepath / (hkstack.rfV1[0].stats.station + \ ".hkstack."+args.typ+".pkl") hkstack.save(file=filename)
class chunk(object): def __init__(self, N, S, L, C, verbose = False): self.N = N self.S = S self.L = L self.C = C self._verbose = verbose self._S = Stream() def id(self): return "%s.%s.%s.%s" % (self.N, self.S, self.L, self.C) def holdings(self): print(self._S) return @staticmethod def _syncronize(others, this): others.append(this) ss = reduce(max, map(lambda st: st[0].stats.starttime, others)) ee = reduce(min, map(lambda st: st[0].stats.endtime, others)) for st in others: st.trim(ss, ee, nearest_sample = True) mask = np.zeros(len(this[0]), dtype=bool) for st in others: if len(st) > 1: st.merge() if not hasattr(st[0].data, "mask"): continue mask += st[0].data.mask for st in others: st[0].data = np.ma.masked_where(mask , st[0].data) return def _clean(self, s, e = None): toremove = [] for t in self._S: if t.stats.endtime < s: toremove.append(t) for t in toremove: self._S.remove(t) if e is not None: for tr in self._S: tr.trim(starttime=s, endtime=e) return def _update(self, s, e): raise NotImplemented("Please Implement Me!") def get(self, start, end, others, withgaps = False, incomplete = False, nosync = False): if isinstance(start, int): s, _ = sol_span_in_utc(start) _, e = sol_span_in_utc(end) else: s, e = start, end self._update(s, e) sliced_t = self._S.slice(s, e, keep_empty_traces = False, nearest_sample = True).copy() ntraces = len(sliced_t) sliced_t = sliced_t.merge(method = 1, interpolation_samples = 0, fill_value = None) if ntraces == 0: return False if not incomplete and (sliced_t[0].stats.starttime > s or \ sliced_t[0].stats.endtime < e): log(f" W:> Trace is incomplete: {sliced_t[0].stats.starttime} >" +\ f" {s} || {sliced_t[0].stats.endtime} < {e}.", level=1, verbose=self._verbose) return False if not withgaps and ntraces > 1: if self._verbose: print(" W:> Trace has %d gap%s." % (ntraces - 1, "" if ntraces == 2 else "s")) return False if not nosync: self._syncronize(others, sliced_t) else: others.append(sliced_t) return True
def main(): print() print("############################################") print("# __ #") print("# _ __ / _|_ __ _ _ ___ ___ _ __ #") print("# | '__| |_| '_ \| | | | / __/ __| '_ \ #") print("# | | | _| |_) | |_| | | (_| (__| |_) | #") print("# |_| |_| | .__/ \__, |___\___\___| .__/ #") print("# |_| |___/_____| |_| #") print("# #") print("############################################") print() # Run Input Parser args = arguments.get_ccp_arguments() # Load Database db = stdb.io.load_db(fname=args.indb) # Construct station key loop allkeys = db.keys() # Extract key subset if len(args.stkeys) > 0: stkeys = [] for skey in args.stkeys: stkeys.extend([s for s in allkeys if skey in s]) else: stkeys = db.keys() if args.load: # Check if CCPimage object exists and whether overwrite has been set load_file = Path('CCP_load.pkl') if load_file.is_file() and not args.ovr: ccpfile = open(load_file, "rb") ccpimage = pickle.load(ccpfile) ccpfile.close() else: print() print("|-----------------------------------------------|") print("| Loading data |") print("|-----------------------------------------------|") print("| Gridding: ") print("| start = {0:5.1f},{1:6.1f}".format( args.coord_start[0], args.coord_start[1])) print("| end = {0:5.1f},{1:6.1f}".format( args.coord_end[0], args.coord_end[1])) print("| dz = {0} (km)".format(str(args.dz))) print("| dx = {0} (km)".format(str(args.dx))) print() # Initialize CCPimage object ccpimage = CCPimage(coord_start=args.coord_start, coord_end=args.coord_end, dz=args.dz, dx=args.dx) # Loop over station keys for stkey in list(stkeys): # Extract station information from dictionary sta = db[stkey] # Define path to see if it exists if args.phase in ['P', 'PP', 'allP']: datapath = Path('P_DATA') / stkey elif args.phase in ['S', 'SKS', 'allS']: datapath = Path('S_DATA') / stkey if not datapath.is_dir(): print('Path to ' + str(datapath) + ' doesn`t exist - continuing') continue # Temporary print locations tlocs = sta.location if len(tlocs) == 0: tlocs = [''] for il in range(0, len(tlocs)): if len(tlocs[il]) == 0: tlocs[il] = "--" sta.location = tlocs rfRstream = Stream() datafiles = [x for x in datapath.iterdir() if x.is_dir()] for folder in datafiles: # Skip hidden folders if folder.name.startswith('.'): continue # Load meta data filename = folder / "Meta_Data.pkl" if not filename.is_file(): continue metafile = open(filename, 'rb') meta = pickle.load(metafile) metafile.close() # Skip data not in list of phases if meta.phase not in args.listphase: continue # QC Thresholding if meta.snrh < args.snrh: continue if meta.snr < args.snr: continue if meta.cc < args.cc: continue # If everything passed, load the RF data filename = folder / "RF_Data.pkl" if filename.is_file(): file = open(filename, "rb") rfdata = pickle.load(file) rfRstream.append(rfdata[1]) file.close() if len(rfRstream) == 0: continue if args.no_outl: t1 = 0. t2 = 30. varR = [] for i in range(len(rfRstream)): taxis = rfRstream[i].stats.taxis tselect = (taxis > t1) & (taxis < t2) varR.append(np.var(rfRstream[i].data[tselect])) varR = np.array(varR) # Remove outliers wrt variance within time range medvarR = np.median(varR) madvarR = 1.4826 * np.median(np.abs(varR - medvarR)) robustR = np.abs((varR - medvarR) / madvarR) outliersR = np.arange(len(rfRstream))[robustR > 2.5] for i in outliersR[::-1]: rfRstream.remove(rfRstream[i]) print("Station: {0:>2s}.{1:5s} - {2} traces loaded".format( sta.network, sta.station, len(rfRstream))) if len(rfRstream) == 0: continue ccpimage.add_rfstream(rfRstream) if len(ccpimage.radialRF) > 0: ccpimage.save("CCP_load.pkl") ccpimage.is_ready_for_prep = True print() print("CCPimage saved to 'CCP_load.pkl'") else: ccpimage.is_ready_for_prep = False else: pass if args.prep: prep_file = Path("CCP_prep.pkl") if prep_file.is_file() and not args.ovr: ccpfile = open(prep_file, 'rb') ccpimage = pickle.load(ccpfile) ccpfile.close() else: load_file = Path('CCP_load.pkl') if not load_file.is_file(): raise (Exception("No CCP_load.pkl file available - aborting")) else: print() print("|-----------------------------------------------|") print("| Preparing data before stacking |") print("|-----------------------------------------------|") print("| Frequencies: ") print("| f1 = {0:4.2f} (Hz)".format(args.f1)) print("| f2ps = {0:4.2f} (Hz)".format(args.f2ps)) print("| f2pps = {0:4.2f} (Hz)".format(args.f2pps)) print("| f2pss = {0:4.2f} (Hz)".format(args.f2pss)) print("| Binning: ") print("| nbaz = {0}".format(str(args.nbaz))) print("| nslow = {0}".format(str(args.nslow))) print() ccpfile = open(load_file, "rb") ccpimage = pickle.load(ccpfile) ccpfile.close() ccpimage.prep_data(f1=args.f1, f2ps=args.f2ps, f2pps=args.f2pps, f2pss=args.f2pss, nbaz=args.nbaz, nslow=args.nslow) ccpimage.is_ready_for_prestack = True ccpimage.save(prep_file) print() print("CCPimage saved to {0}".format(str(prep_file))) else: pass if args.prestack: prestack_file = Path("CCP_prestack.pkl") if prestack_file.is_file() and not args.ovr: ccpfile = open(prestack_file, 'rb') ccpimage = pickle.load(ccpfile) ccpfile.close() else: prep_file = Path("CCP_prep.pkl") if not prep_file.is_file(): raise (Exception("No CCP_prep.pkl file available - aborting")) else: print() print("|-----------------------------------------------|") print("| CCP pre-stacking each phase |") print("|-----------------------------------------------|") print() ccpfile = open(prep_file, 'rb') ccpimage = pickle.load(ccpfile) ccpfile.close() ccpimage.prestack() ccpimage.save(prestack_file) print() print("CCPimage saved to {0}".format(str(prestack_file))) else: pass if args.ccp: ccp_file = Path("CCP_stack.pkl") if ccp_file.is_file() and not args.ovr: ccpfile = open(ccp_file, 'rb') ccpimage = pickle.load(ccpfile) ccpfile.close() else: prestack_file = Path("CCP_prestack.pkl") if not prestack_file.is_file(): raise ( Exception("No CCP_prestack.pkl file available - aborting")) else: if args.linear: print() print("|-----------------------------------------------|") print("| Linear CCP stack - all phases |") print("|-----------------------------------------------|") print() elif args.pws: print() print("|-----------------------------------------------|") print("| Phase-weighted CCP stack - all phases |") print("|-----------------------------------------------|") print() ccpfile = open(prestack_file, 'rb') ccpimage = pickle.load(ccpfile) ccpfile.close() ccpimage.ccp() if args.linear: if args.weights: ccpimage.weights = args.weights ccpimage.linear_stack(typ='ccp') elif args.pws: if args.weights: ccpimage.weights = args.weights ccpimage.phase_weighted_stack(typ='ccp') ccpimage.save(ccp_file) print() print("CCPimage saved to {0}".format(str(ccp_file))) if args.ccp_figure: ccpimage.plot_ccp(save=args.save_figure, fmt=args.fmt, vmin=-1. * args.cbound, vmax=args.cbound, title=args.title) else: pass if args.gccp: gccp_file = Path("GCCP_stack.pkl") if gccp_file.is_file() and not args.ovr: ccpfile = open(gccp_file, 'rb') ccpimage = pickle.load(ccpfile) ccpfile.close() else: prestack_file = Path("CCP_prestack.pkl") if not prestack_file.is_file(): raise ( Exception("No CCP_prestack.pkl file available - aborting")) else: if args.linear: print() print("|-----------------------------------------------|") print("| Linear GCCP stack - all phases |") print("|-----------------------------------------------|") print() elif args.pws: print() print("|-----------------------------------------------|") print("| Phase-weighted GCCP stack - all phases |") print("|-----------------------------------------------|") print() ccpfile = open(prestack_file, 'rb') ccpimage = pickle.load(ccpfile) ccpfile.close() ccpimage.gccp(wlen=args.wlen) if args.linear: if args.weights: ccpimage.weights = args.weights ccpimage.linear_stack(typ='gccp') elif args.pws: if args.weights: ccpimage.weights = args.weights ccpimage.phase_weighted_stack(typ='gccp') ccpimage.save(gccp_file) print() print("CCPimage saved to {0}".format(str(gccp_file))) if args.ccp_figure: ccpimage.plot_gccp(save=args.save_figure, fmt=args.fmt, vmin=-1. * args.cbound, vmax=args.cbound, title=args.title) else: pass
def preprocess(db, stations, comps, goal_day, params, responses=None): datafiles = {} output = Stream() for station in stations: datafiles[station] = {} net, sta = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability( db, net=net, sta=sta, starttime=gd, endtime=gd) for comp in comps: datafiles[station][comp] = [] for file in files: if file.comp[-1] not in comps: continue fullpath = os.path.join(file.path, file.file) datafiles[station][file.comp[-1]].append(fullpath) for istation, station in enumerate(stations): net, sta = station.split(".") for comp in comps: files = eval("datafiles['%s']['%s']" % (station, comp)) if len(files) != 0: logging.debug("%s.%s Reading %i Files" % (station, comp, len(files))) stream = Stream() for file in sorted(files): st = read(file, dytpe=np.float, starttime=UTCDateTime(gd), endtime=UTCDateTime(gd)+86400) tmp = st.select(network=net, station=sta, component=comp) if not len(tmp): for tr in st: tr.stats.network = net st = st.select(network=net, station=sta, component=comp) else: st = tmp for tr in st: tr.data = tr.data.astype(np.float) stream += st del st stream.sort() stream.merge(method=1, interpolation_samples=3, fill_value=None) stream = stream.split() logging.debug("Checking sample alignment") for i, trace in enumerate(stream): stream[i] = check_and_phase_shift(trace) logging.debug("Checking Gaps") if len(getGaps(stream)) > 0: max_gap = 10 only_too_long = False while getGaps(stream) and not only_too_long: too_long = 0 gaps = getGaps(stream) for gap in gaps: if int(gap[-1]) <= max_gap: stream[gap[0]] = stream[gap[0]].__add__(stream[gap[1]], method=1, fill_value="interpolate") stream.remove(stream[gap[1]]) break else: too_long += 1 if too_long == len(gaps): only_too_long = True stream = stream.split() taper_length = 20.0 # seconds for trace in stream: if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate: stream.remove(trace) else: trace.detrend(type="demean") trace.detrend(type="linear") trace.taper(max_percentage=None, max_length=1.0) if not len(stream): logging.debug(" has only too small traces, skipping...") continue for trace in stream: logging.debug( "%s.%s Highpass at %.2f Hz" % (station, comp, params.preprocess_highpass)) trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True) if trace.stats.sampling_rate != params.goal_sampling_rate: logging.debug( "%s.%s Lowpass at %.2f Hz" % (station, comp, params.preprocess_lowpass)) trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8) if params.resampling_method == "Resample": logging.debug("%s.%s Downsample to %.1f Hz" % (station, comp, params.goal_sampling_rate)) trace.data = resample( trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest') elif params.resampling_method == "Decimate": decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate if not int(decimation_factor) == decimation_factor: logging.warning("%s.%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods" " Trace sampling rate = %i ; Desired CC sampling rate = %i" % (station, comp, trace.stats.sampling_rate, params.goal_sampling_rate)) sys.stdout.flush() sys.exit() logging.debug("%s.%s Decimate by a factor of %i" % (station, comp, decimation_factor)) trace.data = trace.data[::int(decimation_factor)] elif params.resampling_method == "Lanczos": logging.debug("%s.%s Downsample to %.1f Hz" % (station, comp, params.goal_sampling_rate)) trace.data = np.array(trace.data) trace.interpolate(method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0) trace.stats.sampling_rate = params.goal_sampling_rate if get_config(db, 'remove_response', isbool=True): logging.debug('%s Removing instrument response'%stream[0].id) response_prefilt = eval(get_config(db, 'response_prefilt')) response = responses[responses["channel_id"] == stream[0].id] if len(response) > 1: response = response[response["start_date"]<UTCDateTime(gd)] response = response[response["end_date"]>UTCDateTime(gd)] elif len(response) == 0: logging.info("No instrument response information " "for %s, exiting" % stream[0].id) sys.exit() datalesspz = response["paz"].values[0] stream.simulate(paz_remove=datalesspz, remove_sensitivity=True, pre_filt=response_prefilt, paz_simulate=None, ) for tr in stream: tr.data = tr.data.astype(np.float32) output += stream del stream del files clean_scipy_cache() return 0, output
def get_waveforms( eqinfo: Event, META, wp_tw_factor=15, t_beforeP=1500., # seconds t_afterWP=60., # seconds client=None, dist_range=(5., 90.), # degrees add_ptime=True, bulk_chunk_len=200, prune_cutoffs=(1., 2., 3., 4., 5., 5.), decimate=True, reject_incomplete=False, req_times=None, waveforms=None, save_path=None): ''' This function will get the waveforms associated with an event to perform a WP inversion. :param float wp_tw_factor: Defines the Wphase time window. this is used as :math:`[t_p, t_p + wp_tw_factor Delta]`. :param float dist_range: A pair of floats specifying the min/max epicentral distance (in degrees) to be considered; i.e. stations outside of this range will be excluded. :param bool add_ptime: Should the p arrival time (with respect to the origin time) be included in each stations metadata. Including this should result in a significant speedup. :param int bulk_chunk_len: Maximum number of entries (channels) to be included in a single request to a metadata service. :param prune_cutoffs: List of pruning (distance) cutoffs passed to :py:func:`station_pruningNEZ` :type prune_cutoffs: list of floats :param bool decimate: Perform decimation of BH channels. **This should always be true when running Wphase**. :param dict req_times: Dictionary keyed by channel id containing the start and end times of the time window required by Wphase. :param client: An Obspy FDSN client. :return: If *add_ptime* is *True*, then return a two element tuple containing: #. An :py:class:`obspy.core.stream.Stream` containing the data. #. A new metadata dictionary containing the p arrival time for each trace. If *add_ptime* is *False* return the stream only. ''' # Obtaining stations within the distance range only. trlist_in_dist_range will contain # the ids of channels which are within the specified distance range. trlist_in_dist_range = [] tr_dists_in_range = [] for trid, stmeta in META.items(): stlat, stlon = stmeta["latitude"], stmeta["longitude"] dist = locations2degrees(eqinfo.latitude, eqinfo.longitude, stlat, stlon) if dist >= dist_range[0] and dist <= dist_range[1]: trlist_in_dist_range.append(trid) tr_dists_in_range.append(dist) # Determining times for the waveform request. req_times will contain time series # window for each channel. # { # <channel-id>: (<obspy.core.utcdatetime.UTCDateTime>, <obspy.core.utcdatetime.UTCDateTime>) # } # # new_META: # { # <channel-id>: original metadata for channel and extra key 'ptime' # } if req_times is None: req_times = {} if add_ptime: new_META = {} for i_trid, trid in enumerate(trlist_in_dist_range): stmeta = META[trid] dist = tr_dists_in_range[i_trid] t_p = getPtime(dist, eqinfo.depth) t_p_UTC = eqinfo.time + t_p t_wp_end = wp_tw_factor * dist t_wp_end_UTC = t_p_UTC + t_wp_end t1 = t_p_UTC - t_beforeP t2 = t_wp_end_UTC + t_afterWP if add_ptime: new_META[trid] = stmeta new_META[trid]['ptime'] = t_p req_times[trid] = [t1, t2] # ---------------get waveforms-------------- # Station pruning if prune_cutoffs is not None: trlist_in_dist_range = station_pruningNEZ(trlist_in_dist_range, new_META, prune_cutoffs) #reject incomplete traces: if reject_incomplete: now = UTCDateTime() trlist_in_dist_range = [ trid for trid in trlist_in_dist_range if req_times[trid][1] < now ] if waveforms: # waveforms provided as input, just clean them if isinstance(waveforms, str): waveforms = obspy.read(waveforms) logger.info('%d traces provided as input', len(waveforms)) st = waveforms else: # fetch waveforms from server logger.info('fetching data from %s', client.base_url) st = Stream() # Create the subsets for each request bulk = [] for trid in trlist_in_dist_range: net, sta, loc, cha = trid.split('.') t1, t2 = req_times[trid] bulk.append([net, sta, loc, cha, t1, t2]) bulk_chunks = [ bulk[i_chunk:i_chunk + bulk_chunk_len] for i_chunk in range(0, len(bulk), bulk_chunk_len) ] # make a call for each subset # TODO: One might want to do this in parallel. for chunk in bulk_chunks: # TODO: Do want to try/catch here? try: st += client.get_waveforms_bulk(chunk) except Exception as e: logger.error('Problem with request from server %s:\n%s', client.base_url, str(e)) continue if save_path: logger.info("Saving waveforms in %s", save_path) st.write(save_path, format='MSEED') # Removing gappy traces (that is channel ids that are repeated) st = remove_gappy_traces(st) logger.info('%s traces remaining after throwing out gappy ones', len(st)) # Decimating BH channels. This can be done in parallel. if decimate: st_B_cha_list = [tr for tr in st if tr.id.split('.')[-1][0] == 'B'] for tr in st_B_cha_list: try: decimateTo1Hz(tr) except CannotDecimate as e: logger.info("Removing trace %s - %s", tr.id, e) st.remove(tr) # Creating contigous arrays for the traces. This may speed up things later. for tr in st: tr.data = np.ascontiguousarray(tr.data) if add_ptime: return st, new_META else: return st
def GetData( eqinfo, META, wp_tw_factor=15, t_beforeP=1500., # seconds t_afterWP=60., # seconds server="http://rhe-eqm-seiscomp-dev.dev.lan:8081/", dist_range=(5., 90.), # degrees add_ptime=True, bulk_chunk_len=200, prune_cutoffs=(1., 2., 3., 4., 5., 5.), decimate=True, reject_incomplete=False, req_times=None): ''' This function will get the waveforms associated with an event to perform a WP inversion. :param float wp_tw_factor: Defines the Wphase time window. this is used as :math:`[t_p, t_p + wp\_tw\_factor \Delta]`. :param float dist_range: A pair of floats specifying the min/max epicentral distance (in degrees) to be considered; i.e. stations outside of this range will be excluded. :param bool add_ptime: Should the p arrival time (with respect to the origin time) be included in each stations metadata. Including this should result in a significant speedup. :param int bulk_chunk_len: Maximum number of entries (channels) to be included in a single request to a metadata service. :param prune_cutoffs: List of pruning (distance) cutoffs passed to :py:func:`station_pruningNEZ` :type prune_cutoffs: list of floats :param bool decimate: Perform decimation of BH channels. **This should always be true when running Wphase**. :param dict req_times: Dictionary keyed by channel id containing the start and end times of the time window required by Wphase. :return: If *add_ptime* is *True*, then return a two element tuple containing: #. An :py:class:`obspy.core.stream.Stream` containing the data. #. A new metadata dictionary containing the p arrival time for each trace. If *add_ptime* is *False* return the stream only. ''' # Sampling rates we will decimate. Channels with other sampling rates will be ignored. decimable_BH_ch = [20., 40., 50.] client = Client(server) hyplat = eqinfo['lat'] hyplon = eqinfo['lon'] hypdep = eqinfo['dep'] otime = eqinfo['time'] # Obtaining stations within the distance range only. trlist_in_dist_range will contain # the ids of channels which are within the specified distance range. trlist_in_dist_range = [] tr_dists_in_range = [] for trid, stmeta in META.iteritems(): stlat, stlon = stmeta['latitude'], stmeta['longitude'] dist = locations2degrees(hyplat, hyplon, stlat, stlon) if dist >= dist_range[0] and dist <= dist_range[1]: trlist_in_dist_range.append(trid) tr_dists_in_range.append(dist) # Determining times for the waveform request. req_times will contain time series # window for each channel. # { # <channel-id>: (<obspy.core.utcdatetime.UTCDateTime>, <obspy.core.utcdatetime.UTCDateTime>) # } # # new_META: # { # <channel-id>: original metadata for channel and extra key 'ptime' # } if req_times is None: req_times = {} if add_ptime: new_META = {} for i_trid, trid in enumerate(trlist_in_dist_range): stmeta = META[trid] dist = tr_dists_in_range[i_trid] t_p = getPtime(dist, hypdep) t_p_UTC = otime + t_p t_wp_end = wp_tw_factor*dist t_wp_end_UTC = t_p_UTC + t_wp_end t1 = t_p_UTC - t_beforeP t2 = t_wp_end_UTC + t_afterWP if add_ptime: new_META[trid] = stmeta new_META[trid]['ptime'] = t_p req_times[trid] = [t1, t2] # ---------------get waveforms-------------- # Station pruning if prune_cutoffs is not None: trlist_in_dist_range = station_pruningNEZ( trlist_in_dist_range, new_META, prune_cutoffs) #reject incomplete traces: if reject_incomplete: now = UTCDateTime() trlist_in_dist_range = [trid for trid in trlist_in_dist_range if req_times[trid][1] < now] st = Stream() # Create the subsets for each request bulk = [] for trid in trlist_in_dist_range: net, sta, loc, cha = trid.split('.') t1, t2 = req_times[trid] bulk.append([net, sta, loc, cha, t1, t2]) bulk_chunks = [bulk[i_chunk:i_chunk + bulk_chunk_len] for i_chunk in xrange(0, len(bulk), bulk_chunk_len)] # make a call for each subset # TODO: One might want to do this in parallel. for chunk in bulk_chunks: # TODO: Do want to try/catch here? try: st += client.get_waveforms_bulk(chunk) except Exception as e: print 'Problem with request from server: {}'.format(server) print e continue # Removing gappy traces (that is channel ids that are repeated) trlist_data = [tr.id for tr in st] rep_ids = [trid for trid, nrep in Counter(trlist_data).items() if nrep > 1] st = Stream(tr for tr in st if tr.id not in rep_ids) # Decimating BH channels. This can be done in parallel. if decimate: st_B_cha_list = [tr for tr in st if tr.id.split('.')[-1][0] == 'B'] for tr in st_B_cha_list: samp_rate = tr.stats.sampling_rate if samp_rate not in decimable_BH_ch: st.remove(tr) continue if samp_rate == 20.: tr = dec20to1(tr, fast=True) elif samp_rate == 40: tr = dec40to1(tr, fast=True) elif samp_rate == 50: tr = dec50to1(tr, fast=True) # Creating contigous arrays for the traces. This may speed up things later. for tr in st: tr.data = np.ascontiguousarray(tr.data) if add_ptime: return st, new_META else: return st
snpts = 10 ## Read in seismic data ## st = Stream() for day in range(stime.julday, etime.julday): for staloc in stalocs: try: sta, loc = staloc.split('_') st += read('/msd/' + net + '_' + sta + '/' + str(stime.year) + '/' + str(stime.julday) + '/' + loc + '_LH*') except: pass # Remove the vertical for tr in st.select(channel="LHZ"): st.remove(tr) st.merge(fill_value=0) st.sort() if debug: print(st) ## Creating Figure ## fig = plt.figure(1, figsize=(19, 10)) plt.subplots_adjust(hspace=0.0) ## Setting font parameters ## mpl.rc('font', family='serif') mpl.rc('font', serif='Times') mpl.rc('text', usetex=True) mpl.rc('font', size=20)
def preprocess(db, stations, comps, goal_day, params, responses=None): """ Fetches data for each ``stations`` and each ``comps`` using the data_availability table in the database. To correct for instrument responses, make sure to set ``remove_response`` to "Y" in the config and to provide the ``responses`` DataFrame. :Example: >>> from msnoise.api import connect, get_params, preload_instrument_responses >>> from msnoise.preprocessing import preprocess >>> db = connect() >>> params = get_params(db) >>> responses = preload_instrument_responses(db) >>> st = preprocess(db, ["YA.UV06","YA.UV10"], ["Z",], "2010-09-01", params, responses) >>> st 2 Trace(s) in Stream: YA.UV06.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples YA.UV10.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples :type db: :class:`sqlalchemy.orm.session.Session` :param db: A :class:`~sqlalchemy.orm.session.Session` object, as obtained by :func:`msnoise.api.connect`. :type stations: list of str :param stations: a list of station names, in the format NET.STA. :type comps: list of str :param comps: a list of component names, in Z,N,E,1,2. :type goal_day: str :param goal_day: the day of data to load, ISO 8601 format: e.g. 2016-12-31. :type params: class :param params: an object containing the config parameters, as obtained by :func:`msnoise.api.get_params`. :type responses: :class:`pandas.DataFrame` :param responses: a DataFrame containing the instrument responses, as obtained by :func:`msnoise.api.preload_instrument_responses`. :rtype: :class:`obspy.core.stream.Stream` :return: A Stream object containing all traces. """ datafiles = {} output = Stream() for station in stations: datafiles[station] = {} net, sta = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability( db, net=net, sta=sta, starttime=gd, endtime=gd) for comp in comps: datafiles[station][comp] = [] for file in files: if file.comp[-1] not in comps: continue fullpath = os.path.join(file.path, file.file) datafiles[station][file.comp[-1]].append(fullpath) for istation, station in enumerate(stations): net, sta = station.split(".") for comp in comps: files = eval("datafiles['%s']['%s']" % (station, comp)) if len(files) != 0: logger.debug("%s.%s Reading %i Files" % (station, comp, len(files))) stream = Stream() for file in sorted(files): try: st = read(file, dytpe=np.float, starttime=UTCDateTime(gd), endtime=UTCDateTime(gd)+86400) except: logger.debug("ERROR reading file %s" % file) continue for tr in st: if len(tr.stats.channel) == 2: tr.stats.channel += tr.stats.location tr.stats.location = "00" tmp = st.select(network=net, station=sta, component=comp) if not len(tmp): for tr in st: tr.stats.network = net st = st.select(network=net, station=sta, component=comp) else: st = tmp for tr in st: tr.data = tr.data.astype(np.float) tr.stats.network = tr.stats.network.upper() tr.stats.station = tr.stats.station.upper() tr.stats.channel = tr.stats.channel.upper() stream += st del st stream.sort() try: # HACK not super clean... should find a way to prevent the # same trace id with different sps to occur stream.merge(method=1, interpolation_samples=3, fill_value=None) except: logger.debug("Error while merging...") traceback.print_exc() continue stream = stream.split() if not len(stream): continue logger.debug("%s Checking sample alignment" % stream[0].id) for i, trace in enumerate(stream): stream[i] = check_and_phase_shift(trace) logger.debug("%s Checking Gaps" % stream[0].id) if len(getGaps(stream)) > 0: max_gap = params.preprocess_max_gap*stream[0].stats.sampling_rate gaps = getGaps(stream) while len(gaps): too_long = 0 for gap in gaps: if int(gap[-1]) <= max_gap: try: stream[gap[0]] = stream[gap[0]].__add__(stream[gap[1]], method=1, fill_value="interpolate") stream.remove(stream[gap[1]]) except: stream.remove(stream[gap[1]]) break else: too_long += 1 if too_long == len(gaps): break gaps = getGaps(stream) del gaps stream = stream.split() for tr in stream: if tr.stats.sampling_rate < (params.goal_sampling_rate-1): stream.remove(tr) taper_length = 20.0 # seconds for trace in stream: if trace.stats.npts < 4 * taper_length * trace.stats.sampling_rate: stream.remove(trace) else: trace.detrend(type="demean") trace.detrend(type="linear") trace.taper(max_percentage=None, max_length=1.0) if not len(stream): logger.debug(" has only too small traces, skipping...") continue for trace in stream: logger.debug( "%s Highpass at %.2f Hz" % (trace.id, params.preprocess_highpass)) trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True, corners=4) if trace.stats.sampling_rate != params.goal_sampling_rate: logger.debug( "%s Lowpass at %.2f Hz" % (trace.id, params.preprocess_lowpass)) trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8) if params.resampling_method == "Resample": logger.debug("%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = resample( trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest') elif params.resampling_method == "Decimate": decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate if not int(decimation_factor) == decimation_factor: logger.warning("%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods" " Trace sampling rate = %i ; Desired CC sampling rate = %i" % (trace.id, trace.stats.sampling_rate, params.goal_sampling_rate)) sys.stdout.flush() sys.exit() logger.debug("%s Decimate by a factor of %i" % (trace.id, decimation_factor)) trace.data = trace.data[::int(decimation_factor)] elif params.resampling_method == "Lanczos": logger.debug("%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = np.array(trace.data) trace.interpolate(method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0) trace.stats.sampling_rate = params.goal_sampling_rate del trace if params.remove_response: logger.debug('%s Removing instrument response'%stream[0].id) response = responses[responses["channel_id"] == stream[0].id] if len(response) > 1: response = response[response["start_date"] <= UTCDateTime(gd)] if len(response) > 1: response = response[response["end_date"] >= UTCDateTime(gd)] elif len(response) == 0: logger.info("No instrument response information " "for %s, skipping" % stream[0].id) continue try: datalesspz = response["paz"].values[0] except: logger.error("Bad instrument response information " "for %s, skipping" % stream[0].id) continue stream.simulate(paz_remove=datalesspz, remove_sensitivity=True, pre_filt=params.response_prefilt, paz_simulate=None, ) for tr in stream: tr.data = tr.data.astype(np.float32) output += stream del stream del files clean_scipy_cache() return output
def main(): print() print( "################################################################################" ) print( "# __ _ _ #" ) print( "# _ __ / _|_ __ _ _ | |__ __ _ _ __ _ __ ___ ___ _ __ (_) ___ ___ #" ) print( "# | '__| |_| '_ \| | | | | '_ \ / _` | '__| '_ ` _ \ / _ \| '_ \| |/ __/ __| #" ) print( "# | | | _| |_) | |_| | | | | | (_| | | | | | | | | (_) | | | | | (__\__ \ #" ) print( "# |_| |_| | .__/ \__, |___|_| |_|\__,_|_| |_| |_| |_|\___/|_| |_|_|\___|___/ #" ) print( "# |_| |___/_____| #" ) print( "# #" ) print( "################################################################################" ) print() # Run Input Parser (opts, indb) = options.get_harmonics_options() # Load Database db = stdb.io.load_db(fname=indb) # Construct station key loop allkeys = db.keys() sorted(allkeys) # Extract key subset if len(opts.stkeys) > 0: stkeys = [] for skey in opts.stkeys: stkeys.extend([s for s in allkeys if skey in s]) else: stkeys = db.keys() sorted(stkeys) # Loop over station keys for stkey in list(stkeys): # Extract station information from dictionary sta = db[stkey] # Define path to see if it exists datapath = 'DATA/' + stkey if not os.path.isdir(datapath): raise (Exception('Path to ' + datapath + ' doesn`t exist - aborting')) # Get search start time if opts.startT is None: tstart = sta.startdate else: tstart = opts.startT # Get search end time if opts.endT is None: tend = sta.enddate else: tend = opts.endT if tstart > sta.enddate or tend < sta.startdate: continue # Temporary print locations tlocs = sta.location if len(tlocs) == 0: tlocs = [''] for il in range(0, len(tlocs)): if len(tlocs[il]) == 0: tlocs[il] = "--" sta.location = tlocs # Update Display print(" ") print(" ") print("|===============================================|") print("|===============================================|") print("| {0:>8s} |".format( sta.station)) print("|===============================================|") print("|===============================================|") print("| Station: {0:>2s}.{1:5s} |".format( sta.network, sta.station)) print("| Channel: {0:2s}; Locations: {1:15s} |".format( sta.channel, ",".join(tlocs))) print("| Lon: {0:7.2f}; Lat: {1:6.2f} |".format( sta.longitude, sta.latitude)) print("| Start time: {0:19s} |".format( sta.startdate.strftime("%Y-%m-%d %H:%M:%S"))) print("| End time: {0:19s} |".format( sta.enddate.strftime("%Y-%m-%d %H:%M:%S"))) print("|-----------------------------------------------|") rfRstream = Stream() rfTstream = Stream() for folder in os.listdir(datapath): # Skip hidden folders if folder.startswith('.'): continue date = folder.split('_')[0] year = date[0:4] month = date[4:6] day = date[6:8] dateUTC = UTCDateTime(year + '-' + month + '-' + day) if dateUTC > tstart and dateUTC < tend: filename = datapath + "/" + folder + "/RF_Data.pkl" if os.path.isfile(filename): file = open(filename, "rb") rfdata = pickle.load(file) if rfdata[0].stats.snrh > opts.snrh and rfdata[0].stats.snr and \ rfdata[0].stats.cc > opts.cc: rfRstream.append(rfdata[1]) rfTstream.append(rfdata[2]) file.close() else: continue if opts.no_outl: # Remove outliers wrt variance varR = np.array([np.var(tr.data) for tr in rfRstream]) # Calculate outliers medvarR = np.median(varR) madvarR = 1.4826 * np.median(np.abs(varR - medvarR)) robustR = np.abs((varR - medvarR) / madvarR) outliersR = np.arange(len(rfRstream))[robustR > 2.] for i in outliersR[::-1]: rfRstream.remove(rfRstream[i]) rfTstream.remove(rfTstream[i]) # Do the same for transverse varT = np.array([np.var(tr.data) for tr in rfTstream]) medvarT = np.median(varT) madvarT = 1.4826 * np.median(np.abs(varT - medvarT)) robustT = np.abs((varT - medvarT) / madvarT) outliersT = np.arange(len(rfTstream))[robustT > 2.] for i in outliersT[::-1]: rfRstream.remove(rfRstream[i]) rfTstream.remove(rfTstream[i]) # Try binning if specified if opts.nbin is not None: rf_tmp = binning.bin(rfRstream, rfTstream, typ='baz', nbin=opts.nbin + 1) rfRstream = rf_tmp[0] rfTstream = rf_tmp[1] # Filter original streams rfRstream.filter('bandpass', freqmin=opts.bp[0], freqmax=opts.bp[1], corners=2, zerophase=True) rfTstream.filter('bandpass', freqmin=opts.bp[0], freqmax=opts.bp[1], corners=2, zerophase=True) # Initialize the HkStack object harmonics = Harmonics(rfRstream, rfTstream) # Stack with or without dip if opts.find_azim: harmonics.dcomp_find_azim(xmin=opts.trange[0], xmax=opts.trange[1]) print("Optimal azimuth for trange between "+\ str(opts.trange[0])+" and "+str(opts.trange[1])+\ "is: "+str(harmonics.azim)) else: harmonics.dcomp_fix_azim(azim=opts.azim) if opts.plot: harmonics.plot(opts.ymax, opts.scale, opts.save_plot, opts.title, opts.form) if opts.save: filename = datapath + "/" + hkstack.hstream[0].stats.station + \ ".harmonics.pkl" harmonics.save()