示例#1
0
 def _read_timeseries_from_cache_file(self):
     try:
         with open(self.filename, newline="\n") as f:
             return HTimeseries(f)
     except (FileNotFoundError, ValueError):
         # If file is corrupted or nonexistent, continue with empty time series
         return HTimeseries()
示例#2
0
def _get_hts_object(timeseries_id, start_date):
    timeseries_top = HTimeseries(
        StringIO(test_timeseries["{}_top".format(timeseries_id)]))
    if start_date is None or start_date == dt.datetime(1, 1, 1, 0, 1):
        return timeseries_top
    assert start_date == timeseries_top.data.index[-1] + dt.timedelta(
        minutes=1)
    result = HTimeseries(
        StringIO(test_timeseries["{}_bottom".format(timeseries_id)]))
    return result
示例#3
0
    def test_daily(self):
        self.setup_daily_input_files()
        self.setup_config_file("D")

        # Verify the output file doesn't exist yet
        result_filename = os.path.join(self.tempdir, "evaporation.hts")
        assert not os.path.exists(result_filename)

        # Execute
        cli.App(self.config_file).run()

        # Check that it has created a file and that the file is correct
        with open(result_filename) as f:
            t = HTimeseries(f)
        expected_result = pd.DataFrame(
            data={
                "value": [3.9],
                "flags": [""]
            },
            columns=["value", "flags"],
            index=[dt.datetime(2014, 7, 6)],
        )
        expected_result.index.name = "date"
        pd.testing.assert_frame_equal(t.data,
                                      expected_result,
                                      check_less_precise=1)
示例#4
0
 def _get_input_timeseries_for_var(self, var):
     filename = os.path.join(self.config.base_dir,
                             getattr(self.config, var + "_prefix") + ".hts")
     if not os.path.exists(filename):
         return
     with open(filename, "r") as f:
         self.input_timeseries[var] = HTimeseries(f)
示例#5
0
 def setUpTestData(cls):
     cls._create_test_timeseries()
     ahtimeseries = HTimeseries(
         StringIO("2020-09-08 20:00,15.7,,\n2020-09-08 21:00,,\n")
     )
     models.TimeseriesRecord.bulk_insert(cls.timeseries, ahtimeseries)
     cls.timeseries_records = models.TimeseriesRecord.objects.all()
示例#6
0
 def test_write(self):
     anp = np.array([
         [parse_date("2005-08-23 18:53"), 93, ""],
         [parse_date("2005-08-24 19:52"), 108.7, ""],
         [parse_date("2005-08-25 23:59"), 28.3, "HEARTS SPADES"],
         [parse_date("2005-08-26 00:02"),
          float("NaN"), ""],
         [parse_date("2005-08-27 00:02"),
          float("NaN"), "DIAMONDS"],
     ])
     data = pd.DataFrame(anp[:, [1, 2]],
                         index=anp[:, 0],
                         columns=("value", "flags"))
     ts = HTimeseries(data=data)
     s = StringIO()
     ts.write(s)
     self.assertEqual(
         s.getvalue(),
         textwrap.dedent("""\
             2005-08-23 18:53,93,\r
             2005-08-24 19:52,108.7,\r
             2005-08-25 23:59,28.3,HEARTS SPADES\r
             2005-08-26 00:02,,\r
             2005-08-27 00:02,,DIAMONDS\r
             """),
     )
示例#7
0
 def setUp(self):
     data = pd.read_csv(
         StringIO(tenmin_test_timeseries),
         parse_dates=[0],
         usecols=["date", "value", "flags"],
         index_col=0,
         header=None,
         names=("date", "value", "flags"),
         dtype={
             "value": np.float64,
             "flags": str
         },
     ).asfreq("10T")
     self.reference_ts = HTimeseries(data=data)
     self.reference_ts.unit = "°C"
     self.reference_ts.title = "A test 10-min time series"
     self.reference_ts.precision = 1
     self.reference_ts.time_step = "10min"
     self.reference_ts.timezone = "EET (UTC+0200)"
     self.reference_ts.variable = "temperature"
     self.reference_ts.comment = ("This timeseries is extremely important\n"
                                  "because the comment that describes it\n"
                                  "spans five lines.\n\n"
                                  "These five lines form two paragraphs.")
     self.reference_ts.location = {
         "abscissa": 24.6789,
         "ordinate": 38.12345,
         "srid": 4326,
         "altitude": 219.22,
         "asrid": None,
     }
示例#8
0
 def _read_timeseries_from_stream(self, stream):
     try:
         return HTimeseries(stream)
     except UnicodeDecodeError as e:
         raise forms.ValidationError(
             _("The file does not seem to be a valid UTF-8 file: " +
               str(e)))
示例#9
0
 def _get_timeseries_if_file_is_up_to_date_else_none(self, dest):
     with open(dest, "r", newline="") as f:
         ts = HTimeseries(f)
     for filename in self.filenames:
         if not self.filename_format.get_date(filename) in ts.data.index:
             return None
     return ts
示例#10
0
 def create_timeseries(self):
     self.htimeseries = HTimeseries()
     self.htimeseries.data = pd.DataFrame(
         index=[dt.datetime(2017, 11, 23, 17, 23), dt.datetime(2018, 11, 25, 1, 0)],
         data={"value": [1.0, 2.0], "flags": ["", ""]},
         columns=["value", "flags"],
     )
     self.station = mommy.make(
         models.Station,
         name="Komboti",
         geom=Point(x=21.00000, y=39.00000, srid=4326),
         original_srid=4326,
     )
     self.time_zone = mommy.make(models.TimeZone, code="EET", utc_offset=120)
     self.variable = models.Variable()
     with switch_language(self.variable, "en"):
         self.variable.descr = "Beauty"
         self.variable.save()
     self.timeseries_group = mommy.make(
         models.TimeseriesGroup,
         gentity=self.station,
         time_zone=self.time_zone,
         precision=2,
         variable=self.variable,
     )
     self.timeseries = mommy.make(
         models.Timeseries,
         type=models.Timeseries.RAW,
         timeseries_group=self.timeseries_group,
     )
     self.timeseries.set_data(self.htimeseries.data)
示例#11
0
 def _prepare_resulting_htimeseries_object(self):
     self.pet = HTimeseries()
     self.pet.time_step = self.config.time_step
     self.pet.unit = "mm"
     self.pet.timezone = self.timezone
     self.pet.variable = "Potential Evapotranspiration"
     self.pet.precision = 2 if self.config.time_step == "H" else 1
     self.pet.location = self.location
示例#12
0
 def setUp(self):
     s = StringIO(tenmin_test_timeseries)
     s.seek(0)
     self.ts = HTimeseries(
         s,
         start_date=dt.datetime(2008, 2, 7, 11, 30),
         end_date=dt.datetime(2008, 2, 7, 11, 55),
     )
示例#13
0
 def test_read_csv_with_duplicates_raises_error(self):
     s = StringIO(self.csv_with_duplicates)
     s.seek(0)
     msg = (
         "Can't read time series: the following timestamps appear more than once: "
         "2020-02-23 12:00:00, 2020-02-23 13:00:00")
     with self.assertRaisesRegex(ValueError, msg):
         HTimeseries(s)
示例#14
0
 def process_timeseries(self):
     self.source_end_date = self.htimeseries.data.index[-1]
     try:
         regularized = self._regularize_time_series(self.htimeseries)
     except RegularizeError as e:
         logging.getLogger("enhydris.autoprocess").error(str(e))
         return HTimeseries()
     aggregated = self._aggregate_time_series(regularized)
     return self._trim_last_record_if_not_complete(aggregated)
示例#15
0
 def _prepare_resulting_htimeseries_object(self):
     self.pet = HTimeseries()
     minutes = int(self.config.step.total_seconds() / 60)
     self.pet.time_step = str(minutes) + ",0"
     self.pet.unit = "mm"
     self.pet.timezone = self.timezone
     self.pet.variable = "Potential Evapotranspiration"
     self.pet.precision = 2 if self.config.step == dt.timedelta(
         hours=1) else 1
     self.pet.location = self.location
示例#16
0
 def _upload_all_new_data(self):
     station_id = self._meteologger_storage.station_id
     sorted_ts_end_dates = sorted(self._ts_end_dates.items(),
                                  key=lambda x: x[1])
     for cts_id, ts_end_date in sorted_ts_end_dates:
         new_data = self._meteologger_storage.get_recent_data(
             cts_id.timeseries_group_id, ts_end_date)
         if len(new_data):
             self.client.post_tsdata(station_id, *cts_id,
                                     HTimeseries(new_data))
示例#17
0
 def setUp(self):
     source_timeseries = pd.DataFrame(
         data={
             "value": [42],
             "flags": [""]
         },
         columns=["value", "flags"],
         index=[dt.datetime(2019, 5, 21, 11, 20)],
     )
     self.aggregation._htimeseries = HTimeseries(source_timeseries)
     self.aggregation._htimeseries.time_step = ""
示例#18
0
 def test_execute(self):
     self.roc_check = mommy.make(RateOfChangeCheck)
     mommy.make(
         RateOfChangeThreshold,
         rate_of_change_check=self.roc_check,
         delta_t="10min",
         allowed_diff=7.0,
     )
     self.roc_check.checks._htimeseries = HTimeseries(
         self.source_timeseries)
     result = self.roc_check.checks.process_timeseries()
     pd.testing.assert_frame_equal(result, self.expected_result)
示例#19
0
 def get_data(self, start_date=None, end_date=None):
     data = cache.get_or_set(f"timeseries_data_{self.id}", self._get_all_data_as_pd)
     if start_date:
         start_date = start_date.astimezone(self.time_zone.as_tzinfo)
         start_date = start_date.replace(tzinfo=None)
     if end_date:
         end_date = end_date.astimezone(self.time_zone.as_tzinfo)
         end_date = end_date.replace(tzinfo=None)
     data = data.loc[start_date:end_date]
     result = HTimeseries(data)
     self._set_extra_timeseries_properties(result)
     return result
示例#20
0
    def test_file_is_not_recreated(self):
        hspatial.PointTimeseries(self.point,
                                 prefix=self.prefix).get_cached(self.dest)

        # Make existing file read-only
        os.chmod(self.dest, S_IREAD | S_IRGRP | S_IROTH)

        # Try again—it shouldn't try to write, therefore it shouldn't raise exception
        hspatial.PointTimeseries(self.point,
                                 prefix=self.prefix).get_cached(self.dest)
        with open(self.dest, "r", newline="\n") as f:
            self._check_against_expected(HTimeseries(f))
示例#21
0
 def test_execute(self):
     self.range_check = mommy.make(
         RangeCheck,
         lower_bound=2,
         upper_bound=5,
         soft_lower_bound=3,
         soft_upper_bound=4,
     )
     self.range_check.checks._htimeseries = HTimeseries(
         self.source_timeseries)
     result = self.range_check.checks.process_timeseries()
     pd.testing.assert_frame_equal(result, self.expected_result)
示例#22
0
    def test_execute(self):
        application = cli.App(self.config_file)

        # Check that the two files don't exist yet
        self.assertFalse(os.path.exists(os.path.join(self.tempdir, "file1")))
        self.assertFalse(os.path.exists(os.path.join(self.tempdir, "file2")))

        # Execute the application
        application.run()

        # Check that it has created two files
        self.assertTrue(os.path.exists(os.path.join(self.tempdir, "file1")))
        self.assertTrue(os.path.exists(os.path.join(self.tempdir, "file2")))

        # Check that the files are what they should be
        with open("file1", newline="\n") as f:
            ts1_before = HTimeseries(f)
        self.assertEqual(ts1_before.time_step, "D")
        c = StringIO()
        ts1_before.write(c)
        self.assertEqual(c.getvalue().replace("\r", ""), self.timeseries1_top)
        with open("file2", newline="\n") as f:
            ts2_before = HTimeseries(f)
        self.assertEqual(ts2_before.time_step, "D")
        c = StringIO()
        ts2_before.write(c)
        self.assertEqual(c.getvalue().replace("\r", ""), self.timeseries2_top)

        # Append a record to the database for each timeseries
        self.api_client.post_tsdata(
            self.station1_id,
            self.timeseries1_id,
            HTimeseries(StringIO(self.timeseries1_bottom)),
        )
        self.api_client.post_tsdata(
            self.station2_id,
            self.timeseries2_id,
            HTimeseries(StringIO(self.timeseries2_bottom)),
        )

        # Execute the application again
        application.run()

        # Check that the files are what they should be
        with open("file1", newline="\n") as f:
            ts1_after = HTimeseries(f)
        self.assertEqual(ts1_after.time_step, "D")
        c = StringIO()
        ts1_after.write(c)
        self.assertEqual(c.getvalue().replace("\r", ""), self.test_timeseries1)
        with open("file2", newline="\n") as f:
            ts2_after = HTimeseries(f)
        self.assertEqual(ts2_after.time_step, "D")
        c = StringIO()
        ts2_after.write(c)
        self.assertEqual(c.getvalue().replace("\r", ""), self.test_timeseries2)

        # Check that the time series comments are the same before and after
        self.assertEqual(ts1_before.comment, ts1_after.comment)
        self.assertEqual(ts2_before.comment, ts2_after.comment)
示例#23
0
 def test_execute(self):
     station = mommy.make(Station)
     self.curve_interpolation = mommy.make(
         CurveInterpolation,
         timeseries_group__gentity=station,
         target_timeseries_group__gentity=station,
     )
     self._setup_period1()
     self._setup_period2()
     self.curve_interpolation._htimeseries = HTimeseries(
         self.source_timeseries)
     result = self.curve_interpolation.process_timeseries()
     pd.testing.assert_frame_equal(result, self.expected_result)
示例#24
0
 def _execute(self, max_missing):
     station = mommy.make(Station)
     self.aggregation = mommy.make(
         Aggregation,
         timeseries_group__gentity=station,
         timeseries_group__variable__descr="Hello",
         target_time_step="H",
         method="sum",
         max_missing=max_missing,
         resulting_timestamp_offset="1min",
     )
     self.aggregation._htimeseries = HTimeseries(self.source_timeseries)
     self.aggregation._htimeseries.time_step = "10min"
     return self.aggregation.process_timeseries().data
示例#25
0
 def get(self):
     result = HTimeseries()
     for filename in self.filenames:
         f = gdal.Open(filename)
         try:
             isostring = f.GetMetadata()["TIMESTAMP"]
             timestamp = iso8601.parse_date(isostring,
                                            default_timezone=None)
             value = extract_point_from_raster(self.point, f)
             result.data.loc[timestamp, "value"] = value
             result.data.loc[timestamp, "flags"] = ""
         finally:
             f = None
     result.data = result.data.sort_index()
     return result
示例#26
0
 def _time_step(self):
     """
     Return time step of all time series. If time step is not the same
     for all time series, raises exception.
     """
     time_step = None
     for filename in self.config.files:
         with open(filename) as f:
             t = HTimeseries(f, start_date="0001-01-01 00:00")
         item_time_step = t.time_step
         if time_step and (item_time_step != time_step):
             raise click.ClickException(
                 "Not all time series have the same step")
         time_step = item_time_step
     return time_step
示例#27
0
 def test_execute(self):
     station = mommy.make(Station)
     self.range_check = mommy.make(
         RangeCheck,
         lower_bound=2,
         upper_bound=5,
         soft_lower_bound=3,
         soft_upper_bound=4,
         station=station,
         source_timeseries__gentity=station,
         target_timeseries__gentity=station,
     )
     self.range_check.htimeseries = HTimeseries(self.source_timeseries)
     result = self.range_check.process_timeseries()
     pd.testing.assert_frame_equal(result, self.expected_result)
示例#28
0
 def create_timeseries(self):
     self.htimeseries = HTimeseries()
     self.htimeseries.data = pd.DataFrame(
         index=[datetime(2017, 11, 23, 17, 23), datetime(2018, 11, 25, 1, 0)],
         data={"value": [1.0, 2.0], "flags": ["", ""]},
         columns=["value", "flags"],
     )
     self.station = mommy.make(models.Station)
     self.timeseries = mommy.make(
         models.Timeseries,
         id=42,
         gentity=self.station,
         time_zone__utc_offset=120,
         precision=2,
     )
示例#29
0
def h_integrate(mask, stations_layer, date, output_filename_prefix, date_fmt,
                funct, kwargs):
    date_fmt_for_filename = date.strftime(date_fmt).replace(" ", "-").replace(
        ":", "-")
    output_filename = "{}-{}.tif".format(output_filename_prefix,
                                         date.strftime(date_fmt_for_filename))
    if not _needs_calculation(output_filename, date, stations_layer):
        return

    # Read the time series values and add the 'value' attribute to
    # stations_layer
    stations_layer.CreateField(ogr.FieldDefn("value", ogr.OFTReal))
    input_files = []
    stations_layer.ResetReading()
    for station in stations_layer:
        filename = station.GetField("filename")
        with open(filename, newline="\n") as f:
            t = HTimeseries(f)
        try:
            value = t.data.loc[date.replace(tzinfo=None), "value"]
        except KeyError:
            value = np.nan
        station.SetField("value", value)
        if not isnan(value):
            input_files.append(filename)
        stations_layer.SetFeature(station)
    if not input_files:
        return

    # Create destination data source
    output = gdal.GetDriverByName("GTiff").Create(output_filename,
                                                  mask.RasterXSize,
                                                  mask.RasterYSize, 1,
                                                  gdal.GDT_Float32)
    output.SetMetadataItem("TIMESTAMP", date.strftime(date_fmt))
    output.SetMetadataItem("INPUT_FILES", "\n".join(input_files))

    try:
        # Set geotransform and projection in the output data source
        output.SetGeoTransform(mask.GetGeoTransform())
        output.SetProjection(mask.GetProjection())

        # Do the integration
        integrate(mask, stations_layer, output.GetRasterBand(1), funct, kwargs)
    finally:
        # Close the dataset
        output = None
示例#30
0
 def _get_all_data_as_pd(self):
     tzoffsetstring = self._get_tzoffsetstring_for_pg()
     with connection.cursor() as cursor:
         cursor.execute(
             """
             SELECT STRING_AGG(
                 TO_CHAR(timestamp at time zone %s, 'YYYY-MM-DD HH24:MI')
                     || ',' || value || ',' || flags,
                 E'\n'
                 ORDER BY timestamp
             ) || E'\n'
             FROM enhydris_timeseriesrecord
             WHERE timeseries_id=%s;
             """,
             [tzoffsetstring, self.id],
         )
         return HTimeseries(StringIO(cursor.fetchone()[0])).data