def get_metadata_for_file(self, absolute_filename): """ Returns the metadata for a certain file. :param absolute_filename: The absolute path of the file. """ if os.path.commonprefix([absolute_filename, self._data_folder]) == \ self._data_folder: relpath = os.path.relpath(absolute_filename, self._data_folder) event, type_or_tag, filename = relpath.split(os.path.sep) if type_or_tag == "raw": c = self.get_waveform_cache(event, "raw") else: c = self.get_waveform_cache(event, "processed", type_or_tag) elif os.path.commonprefix([absolute_filename, self._data_folder]) == \ self._synthetics_folder: relpath = os.path.relpath(absolute_filename, self._synthetics_folder) event, iteration, filename = relpath.split(os.path.sep) c = self.get_waveform_cache(event, "synthetic", iteration) else: raise LASIFError("Invalid path.") return c.get_details(absolute_filename)
def __init__(self, project_root_path, init_project=False, read_only_caches=False): """ Upon intialization, set the paths and read the config file. :type project_root_path: str :param project_root_path: The root path of the project. :type init_project: str :param init_project: Determines whether or not to initialize a new project, e.g. create the necessary folder structure. If a string is passed, the project will be given this name. Otherwise a default name will be chosen. Defaults to False. :param read_only_caches: If True, all caches are read-only. This is important for concurrent access as otherwise you might end up with race conditions. Make sure to build all necessary caches before enabling this, otherwise LASIF will not find all files it requires to work. :type read_only_caches: bool """ # Setup the paths. self.__setup_paths(project_root_path) if init_project: if read_only_caches: raise ValueError("Cannot initialize a project with disabled " "cache-writes.") if not os.path.exists(project_root_path): os.makedirs(project_root_path) self.__init_new_project(init_project) # Project wide flag if the caches are read_only. self.read_only_caches = bool(read_only_caches) if not os.path.exists(self.paths["config_file"]): msg = ("Could not find the project's config file. Wrong project " "path or uninitialized project?") raise LASIFError(msg) self.__project_function_cache = {} # Setup the communicator and register this component. self.__comm = Communicator() super(Project, self).__init__(self.__comm, "project") # Setup the different components. The CACHE folder must already be # present. if not os.path.exists(self.paths["cache"]): os.makedirs(self.paths["cache"]) self.__setup_components() # Finally update the folder structure. self.__update_folder_structure() self._read_config_file() self.__copy_fct_templates(init_project=init_project)
def create_new_iteration(self, iteration_name, solver_name, events_dict, min_period, max_period, seconds_prior_arrival=5., window_length_in_sec=50., quiet=False, create_folders=True): """ Creates a new iteration XML file. :param iteration_name: The name of the iteration. :param solver_name: The name of the solver to be used for the new iteration. :param events_dict: A dictionary specifying the used events. :param min_period: The minimum period in seconds for the new iteration. :param max_period: The maximum period in seconds for the new iteration. :param seconds_prior_arrival: nb of seconds prior the theoretical phase arrival time used to window seismograms for quality control, default 5. :param window_length_in_sec: nb of seconds of the time window used to window seismograms for quality control, default 50. :param quiet: Do not print anything if set to `True`. :param create_folders: Create the folders for this iteration's synthetic waveforms >>> comm = getfixture('iterations_comm') >>> comm.iterations.has_iteration("3") False >>> comm.iterations.create_new_iteration("3", "ses3d_4_1", ... {"EVENT_1": ["AA.BB", "CC.DD"], "EVENT_2": ["EE.FF"]}, ... 10.0, 20.0, quiet=True, create_folders=False) >>> comm.iterations.has_iteration("3") True >>> os.remove(comm.iterations.get_iteration_dict()["3"]) """ iteration_name = str(iteration_name) if iteration_name in self.get_iteration_dict(): msg = "Iteration %s already exists." % iteration_name raise LASIFError(msg) from lasif.iteration_xml import create_iteration_xml_string xml_string = create_iteration_xml_string(iteration_name, solver_name, events_dict, min_period, max_period, seconds_prior_arrival, window_length_in_sec, quiet=quiet) with open(self.get_filename_for_iteration(iteration_name), "wt")\ as fh: fh.write(xml_string) if create_folders: self.create_synthetics_folder_for_iteration(iteration_name) self.create_stf_folder_for_iteration(iteration_name)
def get_project_function(self, fct_type): """ Helper importing the project specific function. :param fct_type: The desired function. """ # Cache to avoid repeated imports. if fct_type in self.__project_function_cache: return self.__project_function_cache[fct_type] # type / filename map fct_type_map = { "window_picking_function": "window_picking_function.py", "preprocessing_function": "preprocessing_function.py", "data_svd_selection": "data_svd_selection.py", "process_synthetics": "process_synthetics.py", "source_time_function": "source_time_function.py", "instaseis_synthetics_function": "instaseis_synthetics_function.py", "stf_deconvolution": "stf_deconvolution.py", } if fct_type not in fct_type: msg = "Function '%s' not found. Available types: %s" % ( fct_type, str(list(fct_type_map.keys()))) raise LASIFNotFoundError(msg) filename = os.path.join(self.paths["functions"], fct_type_map[fct_type]) if not os.path.exists(filename): msg = "No file '%s' in existence." % filename raise LASIFNotFoundError(msg) fct_template = imp.load_source("_lasif_fct_template", filename) try: fct = getattr(fct_template, fct_type) except AttributeError: raise LASIFNotFoundError( "Could not find function %s in file '%s'" % (fct_type, filename)) if not callable(fct): raise LASIFError("Attribute %s in file '%s' is not a function." % (fct_type, filename)) # Add to cache. self.__project_function_cache[fct_type] = fct return fct
def plot_Q_model(self, iteration_name): """ Plots the Q model for a given iteration. Will only work if the iteration uses SES3D as its solver. """ from lasif.tools.Q_discrete import plot iteration = self.get(iteration_name) if iteration.solver_settings["solver"].lower() != "ses3d 4.1": msg = "Only works for SES3D 4.1" raise LASIFError(msg) proc_params = iteration.get_process_params() f_min = proc_params["highpass"] f_max = proc_params["lowpass"] relax = iteration.solver_settings["solver_settings"][ "relaxation_parameter_list"] tau_p = relax["tau"] weights = relax["w"] plot(D_p=weights, tau_p=tau_p, f_min=f_min, f_max=f_max)
def _get_default_solver_settings(solver, min_period, max_period, quiet=False): """ Helper function returning etree representation of a solver's default settings. :param quiet: Do not print anything if set to `True`. """ known_solvers = [ "ses3d_4_1", "ses3d_2_0", "specfem3d_cartesian", "specfem3d_globe_cem" ] if solver.lower() == "ses3d_4_1": from lasif.tools import Q_discrete from lasif.utils import generate_ses3d_4_1_template # Generate the relaxation weights for SES3D. w_p, tau_p = Q_discrete.calculate_Q_model( N=3, # These are suitable for the default frequency range. f_min=1.0 / max_period, f_max=1.0 / min_period, iterations=10000, initial_temperature=0.1, cooling_factor=0.9998, quiet=quiet) return generate_ses3d_4_1_template(w_p, tau_p) elif solver.lower() == "ses3d_2_0": from lasif.utils import generate_ses3d_2_0_template return generate_ses3d_2_0_template() elif solver.lower() == "specfem3d_cartesian": from lasif.utils import generate_specfem3d_cartesian_template return generate_specfem3d_cartesian_template() elif solver.lower() == "specfem3d_globe_cem": from lasif.utils import generate_specfem3d_globe_cem_template return generate_specfem3d_globe_cem_template() else: msg = "Solver '%s' not known. Known solvers: %s" % ( solver, ",".join(known_solvers)) raise LASIFError(msg)
def what_is(self, path): """ Debug function returning a string with information about the file. Useful as a debug function and to figure out what LASIF is doing. :param path: The path to the file. """ path = os.path.normpath(os.path.abspath(path)) # File does not exist. if not os.path.exists(path): raise LASIFNotFoundError("Path '%s' does not exist." % path) # File not part of the project. if os.path.commonprefix([path, self.comm.project.paths["root"]]) \ != self.comm.project.paths["root"]: raise LASIFError("File '%s' is not part of the LASIF project." % path) # Split in dir an folder to ease the rest. if os.path.isdir(path): return self.__what_is_this_folder(path) else: return self.__what_is_this_file(path)
def plot_events(self, plot_type="map"): """ Plots the domain and beachballs for all events on the map. :param plot_type: Determines the type of plot created. * ``map`` (default) - a map view of the events * ``depth`` - a depth distribution histogram * ``time`` - a time distribution histogram """ from lasif import visualization events = self.comm.events.get_all_events().values() if plot_type == "map": m = self.comm.project.domain.plot() visualization.plot_events(events, map_object=m) elif plot_type == "depth": visualization.plot_event_histogram(events, "depth") elif plot_type == "time": visualization.plot_event_histogram(events, "time") else: msg = "Unknown plot_type" raise LASIFError(msg)
def preprocessing_function(processing_info, iteration): # NOQA """ Function to perform the actual preprocessing for one individual seismogram. This is part of the project so it can change depending on the project. Please keep in mind that you will have to manually update this file to a new version if LASIF is ever updated. You can do whatever you want in this function as long as the function signature is honored. The file is read from ``"input_filename"`` and written to ``"output_filename"``. One goal of this function is to make sure that the data is available at the same time steps as the synthetics. The first time sample of the synthetics will always be the origin time of the event. Furthermore the data has to be converted to m/s. :param processing_info: A dictionary containing information about the file to be processed. It will have the following structure. :type processing_info: dict .. code-block:: python {'event_information': { 'depth_in_km': 22.0, 'event_name': 'GCMT_event_VANCOUVER_ISLAND...', 'filename': '/.../GCMT_event_VANCOUVER_ISLAND....xml', 'latitude': 49.53, 'longitude': -126.89, 'm_pp': 2.22e+18, 'm_rp': -2.78e+18, 'm_rr': -6.15e+17, 'm_rt': 1.98e+17, 'm_tp': 5.14e+18, 'm_tt': -1.61e+18, 'magnitude': 6.5, 'magnitude_type': 'Mwc', 'origin_time': UTCDateTime(2011, 9, 9, 19, 41, 34, 200000), 'region': u'VANCOUVER ISLAND, CANADA REGION'}, 'input_filename': u'/.../raw/7D.FN01A..HHZ.mseed', 'output_filename': u'/.../processed_.../7D.FN01A..HHZ.mseed', 'process_params': { 'dt': 0.75, 'highpass': 0.007142857142857143, 'lowpass': 0.0125, 'npts': 2000}, 'station_coordinates': { 'elevation_in_m': -54.0, 'latitude': 46.882, 'local_depth_in_m': None, 'longitude': -124.3337}, 'station_filename': u'/.../STATIONS/RESP/RESP.7D.FN01A..HH*'} Please note that you also got the iteration object here, so if you want some parameters to change depending on the iteration, just use if/else on the iteration objects. >>> iteration.name # doctest: +SKIP '11' >>> iteration.get_process_params() # doctest: +SKIP {'dt': 0.75, 'highpass': 0.01, 'lowpass': 0.02, 'npts': 500} Use ``$ lasif shell`` to play around and figure out what the iteration objects can do. """ def zerophase_chebychev_lowpass_filter(trace, freqmax): """ Custom Chebychev type two zerophase lowpass filter useful for decimation filtering. This filter is stable up to a reduction in frequency with a factor of 10. If more reduction is desired, simply decimate in steps. Partly based on a filter in ObsPy. :param trace: The trace to be filtered. :param freqmax: The desired lowpass frequency. Will be replaced once ObsPy has a proper decimation filter. """ # rp - maximum ripple of passband, rs - attenuation of stopband rp, rs, order = 1, 96, 1e99 ws = freqmax / (trace.stats.sampling_rate * 0.5) # stop band frequency wp = ws # pass band frequency while True: if order <= 12: break wp *= 0.99 order, wn = signal.cheb2ord(wp, ws, rp, rs, analog=0) b, a = signal.cheby2(order, rs, wn, btype="low", analog=0, output="ba") # Apply twice to get rid of the phase distortion. trace.data = signal.filtfilt(b, a, trace.data) def signal_to_noise_ratio(data, first_tt_arrival, process_params): minimum_period = 1. / process_params["highpass"] dt = process_params["dt"] # Estimate noise level from waveforms prior to the first arrival. idx_noise_end = int( np.ceil((first_tt_arrival - 0.5 * minimum_period) / dt)) - 1 idx_noise_end = max(10, idx_noise_end) idx_noise_start = int( np.ceil((first_tt_arrival - 5 * minimum_period) / dt)) idx_noise_start = max(10, idx_noise_start) idx_sigwin_start = int( np.ceil((first_tt_arrival - 0.5 * minimum_period) / dt)) idx_sigwin_end = idx_sigwin_start + int(minimum_period / dt) if idx_noise_start >= idx_noise_end: idx_noise_start = max(10, idx_noise_end - 10) abs_data = np.abs(data[idx_sigwin_start:idx_sigwin_end]) noise_absolute = np.abs(data[idx_noise_start:idx_noise_end]).max() noise_relative = noise_absolute / abs_data.max() return noise_relative, noise_absolute # ========================================================================= # Define noise_theshold by default if not given in argument # ========================================================================= if processing_info["noise_threshold"] is None: noise_threshold = 0.1 else: noise_threshold = processing_info["noise_threshold"] # ========================================================================= # Read seismograms and gather basic information. # ========================================================================= starttime = processing_info["event_information"]["origin_time"] endtime = starttime + processing_info["process_params"]["dt"] * \ (processing_info["process_params"]["npts"] - 1) duration = endtime - starttime st = obspy.read(processing_info["input_filename"]) if len(st) != 1: warnings.warn("The file '%s' has %i traces and not 1. " "Skip all but the first" % (processing_info["input_filename"], len(st))) tr = st[0] # fill the data file header with station coordinates receiver = processing_info["station_coordinates"] tr.stats.coordinates = AttribDict({ 'latitude': receiver["latitude"], 'elevation': receiver["elevation_in_m"], 'longitude': receiver["longitude"] }) # Make sure the seismograms are long enough. If not, skip them. if starttime < tr.stats.starttime or endtime > tr.stats.endtime: msg = ("The seismogram does not cover the required time span.\n" "Seismogram time span: %s - %s\n" "Requested time span: %s - %s" % (tr.stats.starttime, tr.stats.endtime, starttime, endtime)) print(msg) raise LASIFError(msg) # Trim to reduce processing cost. # starttime is the origin time of the event # endtime is the origin time plus the length of the synthetics tr.trim(starttime - 0.2 * duration, endtime + 0.2 * duration) # ========================================================================= # Some basic checks on the data. # ========================================================================= # Non-zero length if not len(tr): msg = "No data found in time window around the event. File skipped." raise LASIFError(msg) # No nans or infinity values allowed. if not np.isfinite(tr.data).all(): msg = "Data contains NaNs or Infs. File skipped" raise LASIFError(msg) # ========================================================================= # Step 1: Decimation # Decimate with the factor closest to the sampling rate of the synthetics. # The data is still oversampled by a large amount so there should be no # problems. This has to be done here so that the instrument correction is # reasonably fast even for input data with a large sampling rate. # ========================================================================= while True: decimation_factor = int(processing_info["process_params"]["dt"] / tr.stats.delta) # Decimate in steps for large sample rate reductions. if decimation_factor > 8: decimation_factor = 8 if decimation_factor > 1: new_nyquist = tr.stats.sampling_rate / 2.0 / float( decimation_factor) zerophase_chebychev_lowpass_filter(tr, new_nyquist) tr.decimate(factor=decimation_factor, no_filter=True) else: break # ========================================================================= # Step 2: Detrend and taper. # ========================================================================= tr.detrend("linear") tr.detrend("demean") tr.taper(max_percentage=0.05, type="hann") # ========================================================================= # Step 3: Instrument correction # Correct seismograms to velocity in m/s. # ========================================================================= output_units = "VEL" station_file = processing_info["station_filename"] # check if the station file actually exists ============================== if not processing_info["station_filename"]: msg = "No station file found for the relevant time span. File skipped" raise LASIFError(msg) # This is really necessary as other filters are just not sharp enough # and lots of energy from other frequency bands leaks into the frequency # band of interest freqmin = processing_info["process_params"]["highpass"] freqmax = processing_info["process_params"]["lowpass"] f2 = 0.9 * freqmin f3 = 1.1 * freqmax # Recommendations from the SAC manual. f1 = 0.5 * f2 f4 = 2.0 * f3 pre_filt = (f1, f2, f3, f4) # processing for seed files ============================================== if "/SEED/" in station_file: # XXX: Check if this is m/s. In all cases encountered so far it # always is, but SEED is in theory also able to specify corrections # to other units... parser = Parser(station_file) try: # The simulate might fail but might still modify the data. The # backup is needed for the backup plan to only correct using # poles and zeros. backup_tr = tr.copy() try: tr.simulate(seedresp={ "filename": parser, "units": output_units, "date": tr.stats.starttime }, pre_filt=pre_filt, zero_mean=False, taper=False) except ValueError: warnings.warn("Evalresp failed, will only use the Poles and " "Zeros stage") tr = backup_tr paz = parser.get_paz(tr.id, tr.stats.starttime) if paz["sensitivity"] == 0: warnings.warn("Sensitivity is 0 in SEED file and will " "not be taken into account!") tr.simulate(paz_remove=paz, remove_sensitivity=False, pre_filt=pre_filt, zero_mean=False, taper=False) else: tr.simulate(paz_remove=paz, pre_filt=pre_filt, zero_mean=False, taper=False) except Exception as e: msg = ("File could not be corrected with the help of the " "SEED file '%s'. Will be skipped due to: %s") \ % (processing_info["station_filename"], str(e)) raise LASIFError(msg) # processing with RESP files ============================================= elif "/RESP/" in station_file: try: tr.simulate(seedresp={ "filename": station_file, "units": output_units, "date": tr.stats.starttime }, pre_filt=pre_filt, zero_mean=False, taper=False) except ValueError as e: msg = ("File could not be corrected with the help of the " "RESP file '%s'. Will be skipped. Due to: %s") \ % (processing_info["station_filename"], str(e)) raise LASIFError(msg) elif "/StationXML/" in station_file: try: inv = obspy.read_inventory(station_file, format="stationxml") except Exception as e: msg = ("Could not open StationXML file '%s'. Due to: %s. Will be " "skipped." % (station_file, str(e))) raise LASIFError(msg) tr.attach_response(inv) try: tr.remove_response(output=output_units, pre_filt=pre_filt, zero_mean=False, taper=False) except Exception as e: msg = ("File could not be corrected with the help of the " "StationXML file '%s'. Due to: '%s' Will be skipped.") \ % (processing_info["station_filename"], e.__repr__()), raise LASIFError(msg) else: raise NotImplementedError # ========================================================================= # Step 4: Bandpass filtering # This has to be exactly the same filter as in the source time function # in the case of SES3D. # ========================================================================= tr.detrend("linear") tr.detrend("demean") tr.taper(0.05, type="cosine") tr.filter("bandpass", freqmin=freqmin, freqmax=freqmax, corners=3, zerophase=False) tr.detrend("linear") tr.detrend("demean") tr.taper(0.05, type="cosine") tr.filter("bandpass", freqmin=freqmin, freqmax=freqmax, corners=3, zerophase=False) # ========================================================================= # Step 5: Sinc interpolation # ========================================================================= # Make sure that the data array is at least as long as the # synthetics array. tr.interpolate(sampling_rate=1.0 / processing_info["process_params"]["dt"], method="lanczos", starttime=starttime, window="blackman", a=12, npts=processing_info["process_params"]["npts"]) # ========================================================================= # Step 6: Waveform selection based on SNR # ========================================================================= # compute the noise_relative level snr = signal_to_noise_ratio(tr.data, processing_info["first_P_arrival"], processing_info["process_params"])[0] # selection if snr < noise_threshold: # ========================================================================= # Save processed data and clean up. # ========================================================================= # Convert to single precision to save some space. tr.data = np.require(tr.data, dtype="float32", requirements="C") if hasattr(tr.stats, "mseed"): tr.stats.mseed.encoding = "FLOAT32" tr.write(processing_info["output_filename"], format=tr.stats._format)
def preprocessing_function_asdf(dir_obs,eventname,time_increment,end_time,min_period,max_period): processdir=eventname+'preprocessed' processdata='preprocessed_'+ str(min_period) +'s_to_'+str(max_period)+'s.h5' tag_name='preprocessed_'+ str(min_period) +'s_to_'+str(max_period)+'s' if os.path.exists(processdir+'/'+processdata): os.system('rm -rf'+' '+processdir+'/'+processdata) # os.makedirs(processdir) os.system('cp'+' '+dir_obs+' '+processdir+'/'+processdata) def zerophase_chebychev_lowpass_filter(trace, freqmax): """ Custom Chebychev type two zerophase lowpass filter useful for decimation filtering. This filter is stable up to a reduction in frequency with a factor of 10. If more reduction is desired, simply decimate in steps. Partly based on a filter in ObsPy. :param trace: The trace to be filtered. :param freqmax: The desired lowpass frequency. Will be replaced once ObsPy has a proper decimation filter. """ # rp - maximum ripple of passband, rs - attenuation of stopband rp, rs, order = 1, 96, 1e99 ws = freqmax / (trace.stats.sampling_rate * 0.5) # stop band frequency wp = ws # pass band frequency while True: if order <= 12: break wp *= 0.99 order, wn = signal.cheb2ord(wp, ws, rp, rs, analog=0) b, a = signal.cheby2(order, rs, wn, btype="low", analog=0, output="ba") print(trace) # Apply twice to get rid of the phase distortion. trace.data = signal.filtfilt(b, a, trace.data) # ========================================================================= # Read ASDF file # ========================================================================= ds = pyasdf.ASDFDataSet(processdir+'/'+processdata) print(ds) list=ds.waveforms.list() event = ds.events[0] dt=time_increment sampling_rate = 1.0 / dt start_time = -time_increment npts=int(round((end_time -start_time)/time_increment) + 1) origin=event.preferred_origin() or event.origins[0] print(origin.time,start_time) start=UTCDateTime(origin.time) print(start+start_time) starttime=start_time + np.float(start) print(starttime) endtime = end_time+starttime duration = end_time -start_time f2 = 0.9 / max_period f3 = 1.1 / min_period # Recommendations from the SAC manual. f1 = 0.5 * f2 f4 = 2.0 * f3 pre_filt = (f1, f2, f3, f4) for _i,stid in enumerate(list): #print(stid) stla,stlo,evz=ds.waveforms[stid].coordinates.values() st=ds.waveforms[stid].raw_recording for tr in st: print(tr) # Trim to reduce processing costs #tr.trim(starttime - 0.2 * duration, endtime + 0.2 * duration) print(tr) while True: decimation_factor = int(dt / tr.stats.delta) # Decimate in steps for large sample rate reductions. if decimation_factor > 8: decimation_factor = 8 if decimation_factor > 1: new_nyquist = tr.stats.sampling_rate / 2.0 / float( decimation_factor) #print(new_nyquist) zerophase_chebychev_lowpass_filter(tr, new_nyquist) print(tr) tr.decimate(factor=decimation_factor, no_filter=True) else: break inv=ds.waveforms[stid].StationXML # Detrend and taper #print('start') st.detrend("linear") st.detrend("demean") st.taper(max_percentage=0.05, type="hann") # Instrument correction try: st.attach_response(inv) st.remove_response(output="DISP", pre_filt=pre_filt, zero_mean=False, taper=False) except Exception as e: net = inv.get_contents()['channels'][0].split('.', 2)[0] sta = inv.get_contents()['channels'][0].split('.', 2)[1] msg = ("Station: %s.%s could not be corrected with the help of" " asdf file: '%s'. Due to: '%s' Will be skipped.") \ % (net, sta, processing_info["asdf_input_filename"], e.__repr__()), raise LASIFError(msg) # Bandpass filtering st.detrend("linear") st.detrend("demean") st.taper(0.05, type="cosine") st.filter("bandpass", freqmin=1.0 / max_period, freqmax=1.0 / min_period, corners=3, zerophase=False) st.detrend("linear") st.detrend("demean") st.taper(0.05, type="cosine") st.filter("bandpass", freqmin=1.0 / max_period, freqmax=1.0 / min_period, corners=3, zerophase=False) # Sinc interpolation for tr in st: tr.data = np.require(tr.data, requirements="C") st.interpolate(sampling_rate=sampling_rate, method="lanczos", starttime=starttime, window="blackman", a=12, npts=npts) # Convert to single precision to save space. for tr in st: tr.data = np.require(tr.data, dtype="float32", requirements="C") ds.add_waveforms(st,tag=tag_name) del ds.waveforms[stid].raw_recording del ds.waveforms[stid].preprocess
def get_matching_waveforms(self, event, iteration, station_or_channel_id): seed_id = station_or_channel_id.split(".") if len(seed_id) == 2: channel = None station_id = station_or_channel_id elif len(seed_id) == 4: network, station, _, channel = seed_id station_id = ".".join((network, station)) else: raise ValueError("'station_or_channel_id' must either have " "2 or 4 parts.") iteration = self.comm.iterations.get(iteration) event = self.comm.events.get(event) # Get the metadata for the processed and synthetics for this # particular station. data = self.comm.waveforms.get_waveforms_processed( event["event_name"], station_id, tag=iteration.processing_tag) synthetics = self.comm.waveforms.get_waveforms_synthetic( event["event_name"], station_id, long_iteration_name=iteration.long_name) coordinates = self.comm.query.get_coordinates_for_station( event["event_name"], station_id) # Clear data and synthetics! for _st, name in ((data, "observed"), (synthetics, "synthetic")): # Get all components and loop over all components. _comps = set(tr.stats.channel[-1].upper() for tr in _st) for _c in _comps: traces = [ _i for _i in _st if _i.stats.channel[-1].upper() == _c ] if len(traces) == 1: continue elif len(traces) > 1: traces = sorted(traces, key=lambda x: x.id) warnings.warn( "%s data for event '%s', iteration '%s', " "station '%s', and component '%s' has %i traces: " "%s. LASIF will select the first one, but please " "clean up your data." % (name.capitalize(), event["event_name"], iteration.iteration_name, station_id, _c, len(traces), ", ".join(tr.id for tr in traces)), LASIFWarning) for tr in traces[1:]: _st.remove(tr) else: # Should not happen. raise NotImplementedError # Make sure all data has the corresponding synthetics. It should not # happen that one has three channels of data but only two channels # of synthetics...in that case, discard the additional data and # raise a warning. temp_data = [] for data_tr in data: component = data_tr.stats.channel[-1].upper() synthetic_tr = [ tr for tr in synthetics if tr.stats.channel[-1].upper() == component ] if not synthetic_tr: warnings.warn( "Station '%s' has observed data for component '%s' but no " "matching synthetics." % (station_id, component), LASIFWarning) continue temp_data.append(data_tr) data.traces = temp_data if len(data) == 0: raise LASIFError("No data remaining for station '%s'." % station_id) # Scale the data if required. if iteration.scale_data_to_synthetics: for data_tr in data: synthetic_tr = [ tr for tr in synthetics if tr.stats.channel[-1].lower() == data_tr.stats.channel[-1].lower() ][0] scaling_factor = synthetic_tr.data.ptp() / \ data_tr.data.ptp() # Store and apply the scaling. data_tr.stats.scaling_factor = scaling_factor data_tr.data *= scaling_factor data.sort() synthetics.sort() # Select component if necessary. if channel and channel is not None: # Only use the last letter of the channel for the selection. # Different solvers have different conventions for the location # and channel codes. component = channel[-1].upper() data.traces = [ i for i in data.traces if i.stats.channel[-1].upper() == component ] synthetics.traces = [ i for i in synthetics.traces if i.stats.channel[-1].upper() == component ] return DataTuple(data=data, synthetics=synthetics, coordinates=coordinates)
def get_inventory(resp_file, remove_duplicates=False): """ Simple function reading a RESP file and returning a list of dictionaries. Each dictionary contains the following keys for each channel found in the RESP file: * network * station * location * channel * start_date * end_date * channel_id :param resp_file: Resp file to open. :param remove_duplicates: Some RESP files contain the same values twice. This option the duplicates. Defaults to False. """ channels = [] with open(resp_file, "rU") as open_file: current_channel = {} for line in open_file: line = line.strip().upper() if line.startswith("B050F03"): current_channel["station"] = line.split()[-1] if _is_channel_complete(current_channel): channels.append(current_channel) current_channel = {} elif line.startswith("B050F16"): current_channel["network"] = line.split()[-1] if _is_channel_complete(current_channel): channels.append(current_channel) current_channel = {} elif line.startswith("B052F03"): location = line.split()[-1] if location == "??": location = "" current_channel["location"] = location if _is_channel_complete(current_channel): channels.append(current_channel) current_channel = {} elif line.startswith("B052F04"): current_channel["channel"] = line.split()[-1] if _is_channel_complete(current_channel): channels.append(current_channel) current_channel = {} elif line.startswith("B052F22"): current_channel["start_date"] = _parse_resp_datetime_string( line.split()[-1]) if _is_channel_complete(current_channel): channels.append(current_channel) current_channel = {} elif line.startswith("B052F23"): current_channel["end_date"] = _parse_resp_datetime_string( line.split()[-1]) if _is_channel_complete(current_channel): channels.append(current_channel) current_channel = {} for channel in channels: channel["channel_id"] = "{network}.{station}.{location}.{channel}"\ .format(**channel) # Make unique list if requested. if remove_duplicates is True: unique_list = [] for channel in channels: if channel in unique_list: continue unique_list.append(channel) channels = unique_list if not channels: raise LASIFError("'%s' is not a valid RESP file." % resp_file) return channels
def stf_deconvolution(to_be_processed, output_folder, components=['E', 'N', 'Z'],): # NOQA """ Function to perform the actual preprocessing for one individual seismogram. This is part of the project so it can change depending on the project. Please keep in mind that you will have to manually update this file to a new version if LASIF is ever updated. You can do whatever you want in this function as long as the function signature is honored. The file is read from ``"input_filename"`` and written to ``"output_filename"``. One goal of this function is to make sure that the data is available at the same time steps as the synthetics. The first time sample of the synthetics will always be the origin time of the event. Furthermore the data has to be converted to m/s. :param processing_info: A dictionary containing information about the file to be processed. It will have the following structure. :type processing_info: dict .. code-block:: python {'event_information': { 'depth_in_km': 22.0, 'event_name': 'GCMT_event_VANCOUVER_ISLAND...', 'filename': '/.../GCMT_event_VANCOUVER_ISLAND....xml', 'latitude': 49.53, 'longitude': -126.89, 'm_pp': 2.22e+18, 'm_rp': -2.78e+18, 'm_rr': -6.15e+17, 'm_rt': 1.98e+17, 'm_tp': 5.14e+18, 'm_tt': -1.61e+18, 'magnitude': 6.5, 'magnitude_type': 'Mwc', 'origin_time': UTCDateTime(2011, 9, 9, 19, 41, 34, 200000), 'region': u'VANCOUVER ISLAND, CANADA REGION'}, 'input_filename': u'/.../raw/7D.FN01A..HHZ.mseed', 'output_filename': u'/.../processed_.../7D.FN01A..HHZ.mseed', 'process_params': { 'dt': 0.75, 'highpass': 0.007142857142857143, 'lowpass': 0.0125, 'npts': 2000}, 'station_coordinates': { 'elevation_in_m': -54.0, 'latitude': 46.882, 'local_depth_in_m': None, 'longitude': -124.3337}, 'station_filename': u'/.../STATIONS/RESP/RESP.7D.FN01A..HH*'} Please note that you also got the iteration object here, so if you want some parameters to change depending on the iteration, just use if/else on the iteration objects. >>> iteration.name # doctest: +SKIP '11' >>> iteration.get_process_params() # doctest: +SKIP {'dt': 0.75, 'highpass': 0.01, 'lowpass': 0.02, 'npts': 500} Use ``$ lasif shell`` to play around and figure out what the iteration objects can do. """ def source_deconvolution_freq( stream_data, stream_green, lambd=0.001, recompute_syn=False): # Calculate STF: # deconvoluate the Green's functions from observed seismograms # following Pratt 1999, equation 17 nfft = stream_data[0].stats.npts num = np.zeros(nfft, dtype=complex) den = np.zeros(nfft, dtype=complex) chi_obs = [] for tr, sy in zip(stream_data, stream_green): tr_fft = np.fft.fft(tr.data, nfft) sy_fft = np.fft.fft(sy.data, nfft) num += np.conjugate(sy_fft) * tr_fft den += np.conjugate(sy_fft) * sy_fft chi_obs.append(np.sum(tr.data**2)) chi_obs = 0.5 * np.sum(chi_obs) water_level = lambd * np.max(np.abs(den)) s = num / (den + water_level) src = np.real(np.fft.ifft(s)) stream_src = obspy.Stream() stream_src += tr.copy() stream_src[0].stats.station = '' stream_src[0].data = src residual = [] stream_syn = obspy.Stream() # recompute synthetics with the estimated STF if recompute_syn: src_fft = np.fft.fft(src, nfft) chi_syn = [] for tr, sy in zip(stream_data, stream_green): sy_fft = np.fft.fft(sy.data, nfft) cal = sy.copy() cal.data = np.real(np.fft.ifft(src_fft * sy_fft)) stream_syn += cal res = tr.data - cal.data chi_syn.append(np.sum(res**2)) chi_syn = 0.5 * np.sum(chi_syn) residual = chi_syn / chi_obs return stream_src, stream_syn, residual # ========================================================================= # Entering the function # ========================================================================= from matplotlib.dates import date2num, num2date SECONDS_PER_DAY = 3600 * 24 process_params = to_be_processed[0]["processing_info"]["process_params"] seconds_prior_arrival = process_params["seconds_prior_arrival"] window_length_in_sec = process_params["window_length_in_sec"] for comp in components: # ========================================================================= # Component selection # ========================================================================= # !!!!!! replace first_P_arrival by phase of interest to be calculated in preprocess_data and given in process_info wav_file_list = [wav["processing_info"]["input_filename"] for wav in to_be_processed if comp in wav["processing_info"]["channel"]] syn_file_list = [wav["processing_info"]["output_filename"] for wav in to_be_processed if comp in wav["processing_info"]["channel"]] first_arrival = [wav["processing_info"]["first_P_arrival"] for wav in to_be_processed if comp in wav["processing_info"]["channel"]] idx_sigwin_start = int( np.ceil( (np.min(first_arrival) - process_params["seconds_prior_arrival"]) / process_params["dt"])) idx_sigwin_end = int( np.ceil( (np.max(first_arrival) + process_params["window_length_in_sec"]) / process_params["dt"])) Time = np.arange( 0, process_params["npts"] * process_params["dt"], process_params["dt"]) starttime = to_be_processed[0]["processing_info"]["event_information"]["origin_time"] t_start = num2date(((Time[idx_sigwin_start] / SECONDS_PER_DAY) + date2num(starttime.datetime))) t_end = num2date(((Time[idx_sigwin_end] / SECONDS_PER_DAY) + date2num(starttime.datetime))) startdate = obspy.UTCDateTime( t_start.year, t_start.month, t_start.day, t_start.hour, t_start.minute, t_start.second, t_start.microsecond) enddate = obspy.UTCDateTime(t_end.year, t_end.month, t_end.day, t_end.hour, t_end.minute, t_end.second, t_end.microsecond) # ========================================================================= # read traces, window around phase of interest # ========================================================================= st_wav = obspy.Stream() st_syn = obspy.Stream() for wav_file, syn_file in zip(wav_file_list, syn_file_list): wav = obspy.read(wav_file) syn = obspy.read(syn_file) wav.trim(startdate, enddate) syn.trim(startdate, enddate) #wav[0].data = wav[0].data[idx_sigwin_start:idx_sigwin_end] #syn[0].data = syn[0].data[idx_sigwin_start:idx_sigwin_end] wav[0].data /= np.max(wav[0].data) syn[0].data /= np.max(syn[0].data) st_wav += wav st_syn += syn # if no waveform selected at the previous step (snr criteria), quit the # process if not st_wav or not st_syn: raise LASIFError( "No data for this event, will skip the stf estimation") else: st_wav.taper(0.01) st_syn.taper(0.01) # ========================================================================= # stf deconvolution # ========================================================================= # stf, new_syn, residual = source_deconvolution_freq(st_wav, st_syn, # lambd=0.001, recompute_syn=True) stf, p, pp = source_deconvolution_freq( st_wav, st_syn, lambd=0.001, recompute_syn=False) ''' src = obspy.read(wav_file_list[0]) src[0].stats.station='' src_trace = np.zeros(process_params["npts"], dtype=float) src_trace[idx_sigwin_start:idx_sigwin_end] = stf[0].data src[0].data = src_trace.copy() stf = src.copy() ''' # ========================================================================= # write stf file # ========================================================================= # Convert to single precision to save some space. tr = stf[0].copy() tr.data = np.require(tr.data, dtype="float32", requirements="C") tr.stats._format = wav[0].stats._format if hasattr(tr.stats, "mseed"): # to be fixed tr.stats.mseed.encoding = "FLOAT32" # channel_id = [item["processing_info"]["channel"] # for item in to_be_processed # if comp in item["processing_info"]["channel"]][0] stf_filename = os.path.join(output_folder, "stf_%s__%s__%s" % (comp, to_be_processed[0]["processing_info"]["output_filename"].split('/')[-1].split('__')[-2], to_be_processed[0]["processing_info"]["output_filename"].split('/')[-1].split('__')[-1])) tr.write(stf_filename, format=tr.stats._format) """
def get_matching_waveforms(self, event, iteration, station_or_channel_id): seed_id = station_or_channel_id.split(".") if len(seed_id) == 2: channel = None station_id = station_or_channel_id elif len(seed_id) == 4: network, station, _, channel = seed_id station_id = ".".join((network, station)) else: raise ValueError("'station_or_channel_id' must either have " "2 or 4 parts.") iteration = self.comm.iterations.get(iteration) event = self.comm.events.get(event) # Get the metadata for the processed and synthetics for this # particular station. data = self.comm.waveforms.get_waveforms_processed( event["event_name"], station_id, tag=iteration.processing_tag) synthetics = self.comm.waveforms.get_waveforms_synthetic( event["event_name"], station_id, long_iteration_name=iteration.long_name) coordinates = self.comm.query.get_coordinates_for_station( event["event_name"], station_id) # Make sure all data has the corresponding synthetics. It should not # happen that one has three channels of data but only two channels # of synthetics...in that case, discard the additional data and # raise a warning. temp_data = [] for data_tr in data: component = data_tr.stats.channel[-1].upper() synthetic_tr = [ tr for tr in synthetics if tr.stats.channel[-1].upper() == component ] if not synthetic_tr: warnings.warn( "Station '%s' has observed data for component '%s' but no " "matching synthetics." % (station_id, component), LASIFWarning) continue temp_data.append(data_tr) data.traces = temp_data if len(data) == 0: raise LASIFError("No data remaining for station '%s'." % station_id) # Scale the data if required. if iteration.scale_data_to_synthetics: for data_tr in data: synthetic_tr = [ tr for tr in synthetics if tr.stats.channel[-1].lower() == data_tr.stats.channel[-1].lower() ][0] scaling_factor = synthetic_tr.data.ptp() / \ data_tr.data.ptp() # Store and apply the scaling. data_tr.stats.scaling_factor = scaling_factor data_tr.data *= scaling_factor data.sort() synthetics.sort() # Select component if necessary. if channel and channel is not None: # Only use the last letter of the channel for the selection. # Different solvers have different conventions for the location # and channel codes. component = channel[-1].upper() data.traces = [ i for i in data.traces if i.stats.channel[-1].upper() == component ] synthetics.traces = [ i for i in synthetics.traces if i.stats.channel[-1].upper() == component ] return DataTuple(data=data, synthetics=synthetics, coordinates=coordinates)