def run_as_update(self, options): """Updates data. Parameters ---------- options: dictionary The dictionary of all the command line arguments. Could in theory contain other options passed in by the controller. Notes ----- Finds gaps in the target data, and if there's new data in the input source, calls run with the start/end time of a given gap to fill in. It checks the start of the target data, and if it's missing, and there's new data available, it backs up the starttime/endtime, and recursively calls itself, to check the previous period, to see if new data is available there as well. Calls run for each new period, oldest to newest. """ algorithm = self._algorithm input_channels = options.inchannels or \ algorithm.get_input_channels() output_channels = options.outchannels or \ algorithm.get_output_channels() # request output to see what has already been generated output_timeseries = self._get_output_timeseries( observatory=options.observatory, starttime=options.starttime, endtime=options.endtime, channels=output_channels) delta = output_timeseries[0].stats.delta # find gaps in output, so they can be updated output_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(output_timeseries)) for output_gap in output_gaps: input_timeseries = self._get_input_timeseries( observatory=options.observatory, starttime=output_gap[0], endtime=output_gap[1], channels=input_channels) if not algorithm.can_produce_data( starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries): continue # check for fillable gap at start if output_gap[0] == options.starttime: # found fillable gap at start, recurse to previous interval interval = options.endtime - options.starttime starttime = options.starttime - interval - delta endtime = options.starttime - delta options.starttime = starttime options.endtime = endtime self.run_as_update(options) # fill gap options.starttime = output_gap[0] options.endtime = output_gap[1] self.run(options)
def run_as_update(self, options): """Updates data. Parameters ---------- options: dictionary The dictionary of all the command line arguments. Could in theory contain other options passed in by the controller. Notes ----- Finds gaps in the target data, and if there's new data in the input source, calls run with the start/end time of a given gap to fill in. It checks the start of the target data, and if it's missing, and there's new data available, it backs up the starttime/endtime, and recursively calls itself, to check the previous period, to see if new data is available there as well. Calls run for each new period, oldest to newest. """ algorithm = self._algorithm input_channels = options.inchannels or \ algorithm.get_input_channels() output_channels = options.outchannels or \ algorithm.get_output_channels() # request output to see what has already been generated output_timeseries = self._get_output_timeseries( observatory=options.observatory, starttime=options.starttime, endtime=options.endtime, channels=output_channels) delta = output_timeseries[0].stats.delta # find gaps in output, so they can be updated output_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(output_timeseries)) for output_gap in output_gaps: input_timeseries = self._get_input_timeseries( observatory=options.observatory, starttime=output_gap[0], endtime=output_gap[1], channels=input_channels) if not algorithm.can_produce_data(starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries): continue # check for fillable gap at start if output_gap[0] == options.starttime: # found fillable gap at start, recurse to previous interval interval = options.endtime - options.starttime starttime = options.starttime - interval - delta endtime = options.starttime - delta options.starttime = starttime options.endtime = endtime self.run_as_update(options) # fill gap options.starttime = output_gap[0] options.endtime = output_gap[1] self.run(options)
def test_get_stream_gaps(): """geomag.TimeseriesUtility_test.test_get_stream_gaps confirms that gaps are found in a stream """ stream = Stream([ __create_trace('H', [numpy.nan, 1, 1, numpy.nan, numpy.nan]), __create_trace('Z', [0, 0, 0, 1, 1, 1]) ]) for trace in stream: # set time of first sample trace.stats.starttime = UTCDateTime('2015-01-01T00:00:00Z') # set sample rate to 1 second trace.stats.delta = 1 # find gaps gaps = TimeseriesUtility.get_stream_gaps(stream) assert_equals(len(gaps['H']), 2) # gap at start of H gap = gaps['H'][0] assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:00Z')) assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:00Z')) # gap at end of H gap = gaps['H'][1] assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:03Z')) assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:04Z')) # no gaps in Z channel assert_equals(len(gaps['Z']), 0)
def can_produce_data(self, starttime, endtime, stream): """Can Product data Parameters ---------- starttime: UTCDateTime start time of requested output end : UTCDateTime end time of requested output stream: obspy.core.Stream The input stream we want to make certain has data for the algorithm """ input_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(stream)) for input_gap in input_gaps: # Check for gaps that include the entire range if (starttime >= input_gap[0] and starttime <= input_gap[1] and endtime < input_gap[2]): return False return True
def test_get_trace_gaps(): """geomag.TimeseriesUtility_test.test_get_trace_gaps confirm that gaps are found in a trace """ trace = __create_trace('H', [1, 1, numpy.nan, numpy.nan, 0, 1]) # set time of first sample trace.stats.starttime = UTCDateTime('2015-01-01T00:00:00Z') # set sample rate to 1 minute trace.stats.delta = 60 # find gap gaps = TimeseriesUtility.get_trace_gaps(trace) assert_equals(len(gaps), 1) gap = gaps[0] assert_equals(gap[0], UTCDateTime('2015-01-01T00:02:00Z')) assert_equals(gap[1], UTCDateTime('2015-01-01T00:03:00Z'))
def test_get_merged_gaps(): """geomag.TimeseriesUtility_test.test_get_merged_gaps confirm that gaps are merged """ merged = TimeseriesUtility.get_merged_gaps({ 'H': [ # gap for 2 seconds, that starts after next gap [ UTCDateTime('2015-01-01T00:00:01Z'), UTCDateTime('2015-01-01T00:00:03Z'), UTCDateTime('2015-01-01T00:00:04Z') ] ], # gap for 1 second, that occurs before previous gap 'Z': [ [ UTCDateTime('2015-01-01T00:00:00Z'), UTCDateTime('2015-01-01T00:00:00Z'), UTCDateTime('2015-01-01T00:00:01Z') ], [ UTCDateTime('2015-01-01T00:00:05Z'), UTCDateTime('2015-01-01T00:00:07Z'), UTCDateTime('2015-01-01T00:00:08Z') ], ] }) assert_equals(len(merged), 2) # first gap combines H and Z gaps gap = merged[0] assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:00Z')) assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:03Z')) # second gap is second Z gap gap = merged[1] assert_equals(gap[0], UTCDateTime('2015-01-01T00:00:05Z')) assert_equals(gap[1], UTCDateTime('2015-01-01T00:00:07Z'))
def run_as_update(self, options, update_count=0): """Updates data. Parameters ---------- options: dictionary The dictionary of all the command line arguments. Could in theory contain other options passed in by the controller. Notes ----- Finds gaps in the target data, and if there's new data in the input source, calls run with the start/end time of a given gap to fill in. It checks the start of the target data, and if it's missing, and there's new data available, it backs up the starttime/endtime, and recursively calls itself, to check the previous period, to see if new data is available there as well. Calls run for each new period, oldest to newest. """ # If an update_limit is set, make certain we don't step past it. if options.update_limit != 0: if update_count >= options.update_limit: return print >> sys.stderr, 'checking gaps', \ options.starttime, options.endtime algorithm = self._algorithm input_channels = options.inchannels or \ algorithm.get_input_channels() output_channels = options.outchannels or \ algorithm.get_output_channels() # request output to see what has already been generated output_timeseries = self._get_output_timeseries( observatory=options.observatory, starttime=options.starttime, endtime=options.endtime, channels=output_channels) if len(output_timeseries) > 0: # find gaps in output, so they can be updated output_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(output_timeseries)) else: output_gaps = [[ options.starttime, options.endtime, # next sample time not used None ]] for output_gap in output_gaps: input_timeseries = self._get_input_timeseries( observatory=options.observatory, starttime=output_gap[0], endtime=output_gap[1], channels=input_channels) if not algorithm.can_produce_data(starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries): continue # check for fillable gap at start if output_gap[0] == options.starttime: # found fillable gap at start, recurse to previous interval interval = options.endtime - options.starttime starttime = options.starttime - interval endtime = options.starttime - 1 options.starttime = starttime options.endtime = endtime self.run_as_update(options, update_count + 1) # fill gap options.starttime = output_gap[0] options.endtime = output_gap[1] print >> sys.stderr, 'processing', \ options.starttime, options.endtime self.run(options, input_timeseries)
def put_timeseries(self, timeseries, starttime=None, endtime=None, channels=None, type=None, interval=None): """Store timeseries data. Parameters ---------- timeseries : obspy.core.Stream stream containing traces to store. starttime : UTCDateTime time of first sample in timeseries to store. uses first sample if unspecified. endtime : UTCDateTime time of last sample in timeseries to store. uses last sample if unspecified. channels : array_like list of channels to store, optional. uses default if unspecified. type : {'definitive', 'provisional', 'quasi-definitive', 'variation'} data type, optional. uses default if unspecified. interval : {'daily', 'hourly', 'minute', 'monthly', 'second'} data interval, optional. uses default if unspecified. Raises ------ TimeseriesFactoryException if any errors occur. """ if len(timeseries) == 0: # no data to put return if not self.urlTemplate.startswith('file://'): raise TimeseriesFactoryException('Only file urls are supported') channels = channels or self.channels type = type or self.type interval = interval or self.interval stats = timeseries[0].stats delta = stats.delta observatory = stats.station starttime = starttime or stats.starttime endtime = endtime or stats.endtime urlIntervals = Util.get_intervals(starttime=starttime, endtime=endtime, size=self.urlInterval) for urlInterval in urlIntervals: url = self._get_url(observatory=observatory, date=urlInterval['start'], type=type, interval=interval, channels=channels) url_data = timeseries.slice( starttime=urlInterval['start'], # subtract delta to omit the sample at end: `[start, end)` endtime=(urlInterval['end'] - delta)) url_file = Util.get_file_from_url(url, createParentDirectory=True) # existing data file, merge new data into existing if os.path.isfile(url_file): try: existing_data = Util.read_file(url_file) existing_data = self.parse_string( existing_data, observatory=url_data[0].stats.station, type=type, interval=interval, channels=channels) # TODO: make parse_string return the correct location code for trace in existing_data: # make location codes match, just in case new_trace = url_data.select( network=trace.stats.network, station=trace.stats.station, channel=trace.stats.channel)[0] trace.stats.location = new_trace.stats.location url_data = TimeseriesUtility.merge_streams( existing_data, url_data) except IOError: # no data yet pass except NotImplementedError: # factory only supports output pass with open(url_file, 'wb') as fh: try: self.write_file(fh, url_data, channels) except NotImplementedError: raise NotImplementedError( '"put_timeseries" not implemented')
def run_as_update(self, options, update_count=0): """Updates data. Parameters ---------- options: dictionary The dictionary of all the command line arguments. Could in theory contain other options passed in by the controller. Notes ----- Finds gaps in the target data, and if there's new data in the input source, calls run with the start/end time of a given gap to fill in. It checks the start of the target data, and if it's missing, and there's new data available, it backs up the starttime/endtime, and recursively calls itself, to check the previous period, to see if new data is available there as well. Calls run for each new period, oldest to newest. """ # If an update_limit is set, make certain we don't step past it. if options.update_limit != 0: if update_count >= options.update_limit: return print >> sys.stderr, 'checking gaps', \ options.starttime, options.endtime algorithm = self._algorithm input_channels = options.inchannels or \ algorithm.get_input_channels() output_channels = options.outchannels or \ algorithm.get_output_channels() # request output to see what has already been generated output_timeseries = self._get_output_timeseries( observatory=options.observatory, starttime=options.starttime, endtime=options.endtime, channels=output_channels) if len(output_timeseries) > 0: # find gaps in output, so they can be updated output_gaps = TimeseriesUtility.get_merged_gaps( TimeseriesUtility.get_stream_gaps(output_timeseries)) else: output_gaps = [[ options.starttime, options.endtime, # next sample time not used None ]] for output_gap in output_gaps: input_timeseries = self._get_input_timeseries( observatory=options.observatory, starttime=output_gap[0], endtime=output_gap[1], channels=input_channels) if not algorithm.can_produce_data( starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries): continue # check for fillable gap at start if output_gap[0] == options.starttime: # found fillable gap at start, recurse to previous interval interval = options.endtime - options.starttime starttime = options.starttime - interval endtime = options.starttime - 1 options.starttime = starttime options.endtime = endtime self.run_as_update(options, update_count + 1) # fill gap options.starttime = output_gap[0] options.endtime = output_gap[1] print >> sys.stderr, 'processing', \ options.starttime, options.endtime self.run(options)
def put_timeseries(self, timeseries, starttime=None, endtime=None, channels=None, type=None, interval=None): """Store timeseries data. Parameters ---------- timeseries : obspy.core.Stream stream containing traces to store. starttime : UTCDateTime time of first sample in timeseries to store. uses first sample if unspecified. endtime : UTCDateTime time of last sample in timeseries to store. uses last sample if unspecified. channels : array_like list of channels to store, optional. uses default if unspecified. type : {'definitive', 'provisional', 'quasi-definitive', 'variation'} data type, optional. uses default if unspecified. interval : {'daily', 'hourly', 'minute', 'monthly', 'second'} data interval, optional. uses default if unspecified. Raises ------ TimeseriesFactoryException if any errors occur. """ if len(timeseries) == 0: # no data to put return if not self.urlTemplate.startswith('file://'): raise TimeseriesFactoryException('Only file urls are supported') channels = channels or self.channels type = type or self.type interval = interval or self.interval stats = timeseries[0].stats delta = stats.delta observatory = stats.station starttime = starttime or stats.starttime endtime = endtime or stats.endtime urlIntervals = Util.get_intervals( starttime=starttime, endtime=endtime, size=self.urlInterval) for urlInterval in urlIntervals: url = self._get_url( observatory=observatory, date=urlInterval['start'], type=type, interval=interval, channels=channels) url_data = timeseries.slice( starttime=urlInterval['start'], # subtract delta to omit the sample at end: `[start, end)` endtime=(urlInterval['end'] - delta)) url_file = Util.get_file_from_url(url, createParentDirectory=True) # existing data file, merge new data into existing if os.path.isfile(url_file): try: existing_data = Util.read_file(url_file) existing_data = self.parse_string(existing_data, observatory=url_data[0].stats.station, type=type, interval=interval, channels=channels) # TODO: make parse_string return the correct location code for trace in existing_data: # make location codes match, just in case new_trace = url_data.select( network=trace.stats.network, station=trace.stats.station, channel=trace.stats.channel)[0] trace.stats.location = new_trace.stats.location url_data = TimeseriesUtility.merge_streams( existing_data, url_data) except IOError: # no data yet pass except NotImplementedError: # factory only supports output pass with open(url_file, 'wb') as fh: try: self.write_file(fh, url_data, channels) except NotImplementedError: raise NotImplementedError( '"put_timeseries" not implemented')