def _read_timeseries_from_cache_file(self): try: with open(self.filename, newline="\n") as f: return HTimeseries(f) except (FileNotFoundError, ValueError): # If file is corrupted or nonexistent, continue with empty time series return HTimeseries()
def _get_hts_object(timeseries_id, start_date): timeseries_top = HTimeseries( StringIO(test_timeseries["{}_top".format(timeseries_id)])) if start_date is None or start_date == dt.datetime(1, 1, 1, 0, 1): return timeseries_top assert start_date == timeseries_top.data.index[-1] + dt.timedelta( minutes=1) result = HTimeseries( StringIO(test_timeseries["{}_bottom".format(timeseries_id)])) return result
def test_daily(self): self.setup_daily_input_files() self.setup_config_file("D") # Verify the output file doesn't exist yet result_filename = os.path.join(self.tempdir, "evaporation.hts") assert not os.path.exists(result_filename) # Execute cli.App(self.config_file).run() # Check that it has created a file and that the file is correct with open(result_filename) as f: t = HTimeseries(f) expected_result = pd.DataFrame( data={ "value": [3.9], "flags": [""] }, columns=["value", "flags"], index=[dt.datetime(2014, 7, 6)], ) expected_result.index.name = "date" pd.testing.assert_frame_equal(t.data, expected_result, check_less_precise=1)
def _get_input_timeseries_for_var(self, var): filename = os.path.join(self.config.base_dir, getattr(self.config, var + "_prefix") + ".hts") if not os.path.exists(filename): return with open(filename, "r") as f: self.input_timeseries[var] = HTimeseries(f)
def setUpTestData(cls): cls._create_test_timeseries() ahtimeseries = HTimeseries( StringIO("2020-09-08 20:00,15.7,,\n2020-09-08 21:00,,\n") ) models.TimeseriesRecord.bulk_insert(cls.timeseries, ahtimeseries) cls.timeseries_records = models.TimeseriesRecord.objects.all()
def test_write(self): anp = np.array([ [parse_date("2005-08-23 18:53"), 93, ""], [parse_date("2005-08-24 19:52"), 108.7, ""], [parse_date("2005-08-25 23:59"), 28.3, "HEARTS SPADES"], [parse_date("2005-08-26 00:02"), float("NaN"), ""], [parse_date("2005-08-27 00:02"), float("NaN"), "DIAMONDS"], ]) data = pd.DataFrame(anp[:, [1, 2]], index=anp[:, 0], columns=("value", "flags")) ts = HTimeseries(data=data) s = StringIO() ts.write(s) self.assertEqual( s.getvalue(), textwrap.dedent("""\ 2005-08-23 18:53,93,\r 2005-08-24 19:52,108.7,\r 2005-08-25 23:59,28.3,HEARTS SPADES\r 2005-08-26 00:02,,\r 2005-08-27 00:02,,DIAMONDS\r """), )
def setUp(self): data = pd.read_csv( StringIO(tenmin_test_timeseries), parse_dates=[0], usecols=["date", "value", "flags"], index_col=0, header=None, names=("date", "value", "flags"), dtype={ "value": np.float64, "flags": str }, ).asfreq("10T") self.reference_ts = HTimeseries(data=data) self.reference_ts.unit = "°C" self.reference_ts.title = "A test 10-min time series" self.reference_ts.precision = 1 self.reference_ts.time_step = "10min" self.reference_ts.timezone = "EET (UTC+0200)" self.reference_ts.variable = "temperature" self.reference_ts.comment = ("This timeseries is extremely important\n" "because the comment that describes it\n" "spans five lines.\n\n" "These five lines form two paragraphs.") self.reference_ts.location = { "abscissa": 24.6789, "ordinate": 38.12345, "srid": 4326, "altitude": 219.22, "asrid": None, }
def _read_timeseries_from_stream(self, stream): try: return HTimeseries(stream) except UnicodeDecodeError as e: raise forms.ValidationError( _("The file does not seem to be a valid UTF-8 file: " + str(e)))
def _get_timeseries_if_file_is_up_to_date_else_none(self, dest): with open(dest, "r", newline="") as f: ts = HTimeseries(f) for filename in self.filenames: if not self.filename_format.get_date(filename) in ts.data.index: return None return ts
def create_timeseries(self): self.htimeseries = HTimeseries() self.htimeseries.data = pd.DataFrame( index=[dt.datetime(2017, 11, 23, 17, 23), dt.datetime(2018, 11, 25, 1, 0)], data={"value": [1.0, 2.0], "flags": ["", ""]}, columns=["value", "flags"], ) self.station = mommy.make( models.Station, name="Komboti", geom=Point(x=21.00000, y=39.00000, srid=4326), original_srid=4326, ) self.time_zone = mommy.make(models.TimeZone, code="EET", utc_offset=120) self.variable = models.Variable() with switch_language(self.variable, "en"): self.variable.descr = "Beauty" self.variable.save() self.timeseries_group = mommy.make( models.TimeseriesGroup, gentity=self.station, time_zone=self.time_zone, precision=2, variable=self.variable, ) self.timeseries = mommy.make( models.Timeseries, type=models.Timeseries.RAW, timeseries_group=self.timeseries_group, ) self.timeseries.set_data(self.htimeseries.data)
def _prepare_resulting_htimeseries_object(self): self.pet = HTimeseries() self.pet.time_step = self.config.time_step self.pet.unit = "mm" self.pet.timezone = self.timezone self.pet.variable = "Potential Evapotranspiration" self.pet.precision = 2 if self.config.time_step == "H" else 1 self.pet.location = self.location
def setUp(self): s = StringIO(tenmin_test_timeseries) s.seek(0) self.ts = HTimeseries( s, start_date=dt.datetime(2008, 2, 7, 11, 30), end_date=dt.datetime(2008, 2, 7, 11, 55), )
def test_read_csv_with_duplicates_raises_error(self): s = StringIO(self.csv_with_duplicates) s.seek(0) msg = ( "Can't read time series: the following timestamps appear more than once: " "2020-02-23 12:00:00, 2020-02-23 13:00:00") with self.assertRaisesRegex(ValueError, msg): HTimeseries(s)
def process_timeseries(self): self.source_end_date = self.htimeseries.data.index[-1] try: regularized = self._regularize_time_series(self.htimeseries) except RegularizeError as e: logging.getLogger("enhydris.autoprocess").error(str(e)) return HTimeseries() aggregated = self._aggregate_time_series(regularized) return self._trim_last_record_if_not_complete(aggregated)
def _prepare_resulting_htimeseries_object(self): self.pet = HTimeseries() minutes = int(self.config.step.total_seconds() / 60) self.pet.time_step = str(minutes) + ",0" self.pet.unit = "mm" self.pet.timezone = self.timezone self.pet.variable = "Potential Evapotranspiration" self.pet.precision = 2 if self.config.step == dt.timedelta( hours=1) else 1 self.pet.location = self.location
def _upload_all_new_data(self): station_id = self._meteologger_storage.station_id sorted_ts_end_dates = sorted(self._ts_end_dates.items(), key=lambda x: x[1]) for cts_id, ts_end_date in sorted_ts_end_dates: new_data = self._meteologger_storage.get_recent_data( cts_id.timeseries_group_id, ts_end_date) if len(new_data): self.client.post_tsdata(station_id, *cts_id, HTimeseries(new_data))
def setUp(self): source_timeseries = pd.DataFrame( data={ "value": [42], "flags": [""] }, columns=["value", "flags"], index=[dt.datetime(2019, 5, 21, 11, 20)], ) self.aggregation._htimeseries = HTimeseries(source_timeseries) self.aggregation._htimeseries.time_step = ""
def test_execute(self): self.roc_check = mommy.make(RateOfChangeCheck) mommy.make( RateOfChangeThreshold, rate_of_change_check=self.roc_check, delta_t="10min", allowed_diff=7.0, ) self.roc_check.checks._htimeseries = HTimeseries( self.source_timeseries) result = self.roc_check.checks.process_timeseries() pd.testing.assert_frame_equal(result, self.expected_result)
def get_data(self, start_date=None, end_date=None): data = cache.get_or_set(f"timeseries_data_{self.id}", self._get_all_data_as_pd) if start_date: start_date = start_date.astimezone(self.time_zone.as_tzinfo) start_date = start_date.replace(tzinfo=None) if end_date: end_date = end_date.astimezone(self.time_zone.as_tzinfo) end_date = end_date.replace(tzinfo=None) data = data.loc[start_date:end_date] result = HTimeseries(data) self._set_extra_timeseries_properties(result) return result
def test_file_is_not_recreated(self): hspatial.PointTimeseries(self.point, prefix=self.prefix).get_cached(self.dest) # Make existing file read-only os.chmod(self.dest, S_IREAD | S_IRGRP | S_IROTH) # Try again—it shouldn't try to write, therefore it shouldn't raise exception hspatial.PointTimeseries(self.point, prefix=self.prefix).get_cached(self.dest) with open(self.dest, "r", newline="\n") as f: self._check_against_expected(HTimeseries(f))
def test_execute(self): self.range_check = mommy.make( RangeCheck, lower_bound=2, upper_bound=5, soft_lower_bound=3, soft_upper_bound=4, ) self.range_check.checks._htimeseries = HTimeseries( self.source_timeseries) result = self.range_check.checks.process_timeseries() pd.testing.assert_frame_equal(result, self.expected_result)
def test_execute(self): application = cli.App(self.config_file) # Check that the two files don't exist yet self.assertFalse(os.path.exists(os.path.join(self.tempdir, "file1"))) self.assertFalse(os.path.exists(os.path.join(self.tempdir, "file2"))) # Execute the application application.run() # Check that it has created two files self.assertTrue(os.path.exists(os.path.join(self.tempdir, "file1"))) self.assertTrue(os.path.exists(os.path.join(self.tempdir, "file2"))) # Check that the files are what they should be with open("file1", newline="\n") as f: ts1_before = HTimeseries(f) self.assertEqual(ts1_before.time_step, "D") c = StringIO() ts1_before.write(c) self.assertEqual(c.getvalue().replace("\r", ""), self.timeseries1_top) with open("file2", newline="\n") as f: ts2_before = HTimeseries(f) self.assertEqual(ts2_before.time_step, "D") c = StringIO() ts2_before.write(c) self.assertEqual(c.getvalue().replace("\r", ""), self.timeseries2_top) # Append a record to the database for each timeseries self.api_client.post_tsdata( self.station1_id, self.timeseries1_id, HTimeseries(StringIO(self.timeseries1_bottom)), ) self.api_client.post_tsdata( self.station2_id, self.timeseries2_id, HTimeseries(StringIO(self.timeseries2_bottom)), ) # Execute the application again application.run() # Check that the files are what they should be with open("file1", newline="\n") as f: ts1_after = HTimeseries(f) self.assertEqual(ts1_after.time_step, "D") c = StringIO() ts1_after.write(c) self.assertEqual(c.getvalue().replace("\r", ""), self.test_timeseries1) with open("file2", newline="\n") as f: ts2_after = HTimeseries(f) self.assertEqual(ts2_after.time_step, "D") c = StringIO() ts2_after.write(c) self.assertEqual(c.getvalue().replace("\r", ""), self.test_timeseries2) # Check that the time series comments are the same before and after self.assertEqual(ts1_before.comment, ts1_after.comment) self.assertEqual(ts2_before.comment, ts2_after.comment)
def test_execute(self): station = mommy.make(Station) self.curve_interpolation = mommy.make( CurveInterpolation, timeseries_group__gentity=station, target_timeseries_group__gentity=station, ) self._setup_period1() self._setup_period2() self.curve_interpolation._htimeseries = HTimeseries( self.source_timeseries) result = self.curve_interpolation.process_timeseries() pd.testing.assert_frame_equal(result, self.expected_result)
def _execute(self, max_missing): station = mommy.make(Station) self.aggregation = mommy.make( Aggregation, timeseries_group__gentity=station, timeseries_group__variable__descr="Hello", target_time_step="H", method="sum", max_missing=max_missing, resulting_timestamp_offset="1min", ) self.aggregation._htimeseries = HTimeseries(self.source_timeseries) self.aggregation._htimeseries.time_step = "10min" return self.aggregation.process_timeseries().data
def get(self): result = HTimeseries() for filename in self.filenames: f = gdal.Open(filename) try: isostring = f.GetMetadata()["TIMESTAMP"] timestamp = iso8601.parse_date(isostring, default_timezone=None) value = extract_point_from_raster(self.point, f) result.data.loc[timestamp, "value"] = value result.data.loc[timestamp, "flags"] = "" finally: f = None result.data = result.data.sort_index() return result
def _time_step(self): """ Return time step of all time series. If time step is not the same for all time series, raises exception. """ time_step = None for filename in self.config.files: with open(filename) as f: t = HTimeseries(f, start_date="0001-01-01 00:00") item_time_step = t.time_step if time_step and (item_time_step != time_step): raise click.ClickException( "Not all time series have the same step") time_step = item_time_step return time_step
def test_execute(self): station = mommy.make(Station) self.range_check = mommy.make( RangeCheck, lower_bound=2, upper_bound=5, soft_lower_bound=3, soft_upper_bound=4, station=station, source_timeseries__gentity=station, target_timeseries__gentity=station, ) self.range_check.htimeseries = HTimeseries(self.source_timeseries) result = self.range_check.process_timeseries() pd.testing.assert_frame_equal(result, self.expected_result)
def create_timeseries(self): self.htimeseries = HTimeseries() self.htimeseries.data = pd.DataFrame( index=[datetime(2017, 11, 23, 17, 23), datetime(2018, 11, 25, 1, 0)], data={"value": [1.0, 2.0], "flags": ["", ""]}, columns=["value", "flags"], ) self.station = mommy.make(models.Station) self.timeseries = mommy.make( models.Timeseries, id=42, gentity=self.station, time_zone__utc_offset=120, precision=2, )
def h_integrate(mask, stations_layer, date, output_filename_prefix, date_fmt, funct, kwargs): date_fmt_for_filename = date.strftime(date_fmt).replace(" ", "-").replace( ":", "-") output_filename = "{}-{}.tif".format(output_filename_prefix, date.strftime(date_fmt_for_filename)) if not _needs_calculation(output_filename, date, stations_layer): return # Read the time series values and add the 'value' attribute to # stations_layer stations_layer.CreateField(ogr.FieldDefn("value", ogr.OFTReal)) input_files = [] stations_layer.ResetReading() for station in stations_layer: filename = station.GetField("filename") with open(filename, newline="\n") as f: t = HTimeseries(f) try: value = t.data.loc[date.replace(tzinfo=None), "value"] except KeyError: value = np.nan station.SetField("value", value) if not isnan(value): input_files.append(filename) stations_layer.SetFeature(station) if not input_files: return # Create destination data source output = gdal.GetDriverByName("GTiff").Create(output_filename, mask.RasterXSize, mask.RasterYSize, 1, gdal.GDT_Float32) output.SetMetadataItem("TIMESTAMP", date.strftime(date_fmt)) output.SetMetadataItem("INPUT_FILES", "\n".join(input_files)) try: # Set geotransform and projection in the output data source output.SetGeoTransform(mask.GetGeoTransform()) output.SetProjection(mask.GetProjection()) # Do the integration integrate(mask, stations_layer, output.GetRasterBand(1), funct, kwargs) finally: # Close the dataset output = None
def _get_all_data_as_pd(self): tzoffsetstring = self._get_tzoffsetstring_for_pg() with connection.cursor() as cursor: cursor.execute( """ SELECT STRING_AGG( TO_CHAR(timestamp at time zone %s, 'YYYY-MM-DD HH24:MI') || ',' || value || ',' || flags, E'\n' ORDER BY timestamp ) || E'\n' FROM enhydris_timeseriesrecord WHERE timeseries_id=%s; """, [tzoffsetstring, self.id], ) return HTimeseries(StringIO(cursor.fetchone()[0])).data