Exemplo n.º 1
0
 def setUp(self):
     data = pd.read_csv(
         StringIO(tenmin_test_timeseries),
         parse_dates=[0],
         usecols=["date", "value", "flags"],
         index_col=0,
         header=None,
         names=("date", "value", "flags"),
         dtype={
             "value": np.float64,
             "flags": str
         },
     ).asfreq("10T")
     self.reference_ts = HTimeseries(data=data)
     self.reference_ts.unit = "°C"
     self.reference_ts.title = "A test 10-min time series"
     self.reference_ts.precision = 1
     self.reference_ts.time_step = "10min"
     self.reference_ts.timezone = "EET (UTC+0200)"
     self.reference_ts.variable = "temperature"
     self.reference_ts.comment = ("This timeseries is extremely important\n"
                                  "because the comment that describes it\n"
                                  "spans five lines.\n\n"
                                  "These five lines form two paragraphs.")
     self.reference_ts.location = {
         "abscissa": 24.6789,
         "ordinate": 38.12345,
         "srid": 4326,
         "altitude": 219.22,
         "asrid": None,
     }
Exemplo n.º 2
0
 def test_write(self):
     anp = np.array([
         [parse_date("2005-08-23 18:53"), 93, ""],
         [parse_date("2005-08-24 19:52"), 108.7, ""],
         [parse_date("2005-08-25 23:59"), 28.3, "HEARTS SPADES"],
         [parse_date("2005-08-26 00:02"),
          float("NaN"), ""],
         [parse_date("2005-08-27 00:02"),
          float("NaN"), "DIAMONDS"],
     ])
     data = pd.DataFrame(anp[:, [1, 2]],
                         index=anp[:, 0],
                         columns=("value", "flags"))
     ts = HTimeseries(data=data)
     s = StringIO()
     ts.write(s)
     self.assertEqual(
         s.getvalue(),
         textwrap.dedent("""\
             2005-08-23 18:53,93,\r
             2005-08-24 19:52,108.7,\r
             2005-08-25 23:59,28.3,HEARTS SPADES\r
             2005-08-26 00:02,,\r
             2005-08-27 00:02,,DIAMONDS\r
             """),
     )
Exemplo n.º 3
0
 def _read_timeseries_from_cache_file(self):
     try:
         with open(self.filename, newline="\n") as f:
             return HTimeseries(f)
     except (FileNotFoundError, ValueError):
         # If file is corrupted or nonexistent, continue with empty time series
         return HTimeseries()
Exemplo n.º 4
0
 def _prepare_resulting_htimeseries_object(self):
     self.pet = HTimeseries()
     self.pet.time_step = self.config.time_step
     self.pet.unit = "mm"
     self.pet.timezone = self.timezone
     self.pet.variable = "Potential Evapotranspiration"
     self.pet.precision = 2 if self.config.time_step == "H" else 1
     self.pet.location = self.location
Exemplo n.º 5
0
def _get_hts_object(timeseries_id, start_date):
    timeseries_top = HTimeseries(
        StringIO(test_timeseries["{}_top".format(timeseries_id)]))
    if start_date is None or start_date == dt.datetime(1, 1, 1, 0, 1):
        return timeseries_top
    assert start_date == timeseries_top.data.index[-1] + dt.timedelta(
        minutes=1)
    result = HTimeseries(
        StringIO(test_timeseries["{}_bottom".format(timeseries_id)]))
    return result
Exemplo n.º 6
0
 def _prepare_resulting_htimeseries_object(self):
     self.pet = HTimeseries()
     minutes = int(self.config.step.total_seconds() / 60)
     self.pet.time_step = str(minutes) + ",0"
     self.pet.unit = "mm"
     self.pet.timezone = self.timezone
     self.pet.variable = "Potential Evapotranspiration"
     self.pet.precision = 2 if self.config.step == dt.timedelta(
         hours=1) else 1
     self.pet.location = self.location
Exemplo n.º 7
0
 def setUp(self):
     ahtimeseries = HTimeseries()
     ahtimeseries.data = pd.DataFrame(
         index=[datetime(2017, 11, 23, 17, 23), datetime(2018, 11, 25, 1, 0)],
         data={"value": [1.0, 2.0], "flags": ["", ""]},
         columns=["value", "flags"],
     )
     station = mommy.make(models.Station)
     timeseries = mommy.make(
         models.Timeseries, gentity=station, time_zone__utc_offset=120
     )
     with patch("enhydris.models.Timeseries.get_data", return_value=ahtimeseries):
         self.response = self.client.get(
             "/api/stations/{}/timeseries/{}/data/".format(station.id, timeseries.id)
         )
Exemplo n.º 8
0
 def get(self):
     result = HTimeseries()
     for filename in self.filenames:
         f = gdal.Open(filename)
         try:
             isostring = f.GetMetadata()["TIMESTAMP"]
             timestamp = iso8601.parse_date(isostring,
                                            default_timezone=None)
             value = extract_point_from_raster(self.point, f)
             result.data.loc[timestamp, "value"] = value
             result.data.loc[timestamp, "flags"] = ""
         finally:
             f = None
     result.data = result.data.sort_index()
     return result
Exemplo n.º 9
0
    def test_daily(self):
        self.setup_daily_input_files()
        self.setup_config_file("D")

        # Verify the output file doesn't exist yet
        result_filename = os.path.join(self.tempdir, "evaporation.hts")
        assert not os.path.exists(result_filename)

        # Execute
        cli.App(self.config_file).run()

        # Check that it has created a file and that the file is correct
        with open(result_filename) as f:
            t = HTimeseries(f)
        expected_result = pd.DataFrame(
            data={
                "value": [3.9],
                "flags": [""]
            },
            columns=["value", "flags"],
            index=[dt.datetime(2014, 7, 6)],
        )
        expected_result.index.name = "date"
        pd.testing.assert_frame_equal(t.data,
                                      expected_result,
                                      check_less_precise=1)
Exemplo n.º 10
0
 def _get_input_timeseries_for_var(self, var):
     filename = os.path.join(self.config.base_dir,
                             getattr(self.config, var + "_prefix") + ".hts")
     if not os.path.exists(filename):
         return
     with open(filename, "r") as f:
         self.input_timeseries[var] = HTimeseries(f)
Exemplo n.º 11
0
 def setUpTestData(cls):
     cls._create_test_timeseries()
     ahtimeseries = HTimeseries(
         StringIO("2020-09-08 20:00,15.7,,\n2020-09-08 21:00,,\n")
     )
     models.TimeseriesRecord.bulk_insert(cls.timeseries, ahtimeseries)
     cls.timeseries_records = models.TimeseriesRecord.objects.all()
Exemplo n.º 12
0
 def _get_htimeseries_from_data(self, data):
     if isinstance(data, HTimeseries):
         return data
     elif isinstance(data, pd.DataFrame):
         return HTimeseries(data)
     else:
         return HTimeseries.read(data)
Exemplo n.º 13
0
 def _read_timeseries_from_stream(self, stream):
     try:
         return HTimeseries(stream)
     except UnicodeDecodeError as e:
         raise forms.ValidationError(
             _("The file does not seem to be a valid UTF-8 file: " +
               str(e)))
Exemplo n.º 14
0
 def create_timeseries(self):
     self.htimeseries = HTimeseries()
     self.htimeseries.data = pd.DataFrame(
         index=[dt.datetime(2017, 11, 23, 17, 23), dt.datetime(2018, 11, 25, 1, 0)],
         data={"value": [1.0, 2.0], "flags": ["", ""]},
         columns=["value", "flags"],
     )
     self.station = mommy.make(
         models.Station,
         name="Komboti",
         geom=Point(x=21.00000, y=39.00000, srid=4326),
         original_srid=4326,
     )
     self.time_zone = mommy.make(models.TimeZone, code="EET", utc_offset=120)
     self.variable = models.Variable()
     with switch_language(self.variable, "en"):
         self.variable.descr = "Beauty"
         self.variable.save()
     self.timeseries_group = mommy.make(
         models.TimeseriesGroup,
         gentity=self.station,
         time_zone=self.time_zone,
         precision=2,
         variable=self.variable,
     )
     self.timeseries = mommy.make(
         models.Timeseries,
         type=models.Timeseries.RAW,
         timeseries_group=self.timeseries_group,
     )
     self.timeseries.set_data(self.htimeseries.data)
Exemplo n.º 15
0
 def _get_timeseries_if_file_is_up_to_date_else_none(self, dest):
     with open(dest, "r", newline="") as f:
         ts = HTimeseries(f)
     for filename in self.filenames:
         if not self.filename_format.get_date(filename) in ts.data.index:
             return None
     return ts
Exemplo n.º 16
0
 def _get_timeseries_without_moving_file_position(self, datastream):
     original_position = datastream.tell()
     wrapped_datastream = TextIOWrapper(datastream, encoding="utf-8", newline="\n")
     result = HTimeseries.read(wrapped_datastream)
     wrapped_datastream.detach()  # If we don't do this the datastream will be closed
     datastream.seek(original_position)
     return result
Exemplo n.º 17
0
 def test_read_csv_with_duplicates_raises_error(self):
     s = StringIO(self.csv_with_duplicates)
     s.seek(0)
     msg = (
         "Can't read time series: the following timestamps appear more than once: "
         "2020-02-23 12:00:00, 2020-02-23 13:00:00")
     with self.assertRaisesRegex(ValueError, msg):
         HTimeseries(s)
Exemplo n.º 18
0
 def get_data(self, start_date=None, end_date=None):
     if self.datafile:
         with open(self.datafile.path, "r", newline="\n") as f:
             result = HTimeseries.read(f, start_date=start_date, end_date=end_date)
     else:
         result = HTimeseries()
     self._set_extra_timeseries_properties(result)
     return result
Exemplo n.º 19
0
 def setUp(self):
     s = StringIO(tenmin_test_timeseries)
     s.seek(0)
     self.ts = HTimeseries(
         s,
         start_date=dt.datetime(2008, 2, 7, 11, 30),
         end_date=dt.datetime(2008, 2, 7, 11, 55),
     )
Exemplo n.º 20
0
 def process_timeseries(self):
     self.source_end_date = self.htimeseries.data.index[-1]
     try:
         regularized = self._regularize_time_series(self.htimeseries)
     except RegularizeError as e:
         logging.getLogger("enhydris.autoprocess").error(str(e))
         return HTimeseries()
     aggregated = self._aggregate_time_series(regularized)
     return self._trim_last_record_if_not_complete(aggregated)
Exemplo n.º 21
0
 def _upload_all_new_data(self):
     station_id = self._meteologger_storage.station_id
     sorted_ts_end_dates = sorted(self._ts_end_dates.items(),
                                  key=lambda x: x[1])
     for cts_id, ts_end_date in sorted_ts_end_dates:
         new_data = self._meteologger_storage.get_recent_data(
             cts_id.timeseries_group_id, ts_end_date)
         if len(new_data):
             self.client.post_tsdata(station_id, *cts_id,
                                     HTimeseries(new_data))
Exemplo n.º 22
0
 def setUp(self):
     source_timeseries = pd.DataFrame(
         data={
             "value": [42],
             "flags": [""]
         },
         columns=["value", "flags"],
         index=[dt.datetime(2019, 5, 21, 11, 20)],
     )
     self.aggregation._htimeseries = HTimeseries(source_timeseries)
     self.aggregation._htimeseries.time_step = ""
Exemplo n.º 23
0
 def test_execute(self):
     self.range_check = mommy.make(
         RangeCheck,
         lower_bound=2,
         upper_bound=5,
         soft_lower_bound=3,
         soft_upper_bound=4,
     )
     self.range_check.checks._htimeseries = HTimeseries(
         self.source_timeseries)
     result = self.range_check.checks.process_timeseries()
     pd.testing.assert_frame_equal(result, self.expected_result)
Exemplo n.º 24
0
 def test_execute(self):
     self.roc_check = mommy.make(RateOfChangeCheck)
     mommy.make(
         RateOfChangeThreshold,
         rate_of_change_check=self.roc_check,
         delta_t="10min",
         allowed_diff=7.0,
     )
     self.roc_check.checks._htimeseries = HTimeseries(
         self.source_timeseries)
     result = self.roc_check.checks.process_timeseries()
     pd.testing.assert_frame_equal(result, self.expected_result)
Exemplo n.º 25
0
 def get_data(self, start_date=None, end_date=None):
     data = cache.get_or_set(f"timeseries_data_{self.id}", self._get_all_data_as_pd)
     if start_date:
         start_date = start_date.astimezone(self.time_zone.as_tzinfo)
         start_date = start_date.replace(tzinfo=None)
     if end_date:
         end_date = end_date.astimezone(self.time_zone.as_tzinfo)
         end_date = end_date.replace(tzinfo=None)
     data = data.loc[start_date:end_date]
     result = HTimeseries(data)
     self._set_extra_timeseries_properties(result)
     return result
Exemplo n.º 26
0
    def test_file_is_not_recreated(self):
        hspatial.PointTimeseries(self.point,
                                 prefix=self.prefix).get_cached(self.dest)

        # Make existing file read-only
        os.chmod(self.dest, S_IREAD | S_IRGRP | S_IROTH)

        # Try again—it shouldn't try to write, therefore it shouldn't raise exception
        hspatial.PointTimeseries(self.point,
                                 prefix=self.prefix).get_cached(self.dest)
        with open(self.dest, "r", newline="\n") as f:
            self._check_against_expected(HTimeseries(f))
Exemplo n.º 27
0
 def test_execute(self):
     station = mommy.make(Station)
     self.curve_interpolation = mommy.make(
         CurveInterpolation,
         timeseries_group__gentity=station,
         target_timeseries_group__gentity=station,
     )
     self._setup_period1()
     self._setup_period2()
     self.curve_interpolation._htimeseries = HTimeseries(
         self.source_timeseries)
     result = self.curve_interpolation.process_timeseries()
     pd.testing.assert_frame_equal(result, self.expected_result)
Exemplo n.º 28
0
 def _execute(self, max_missing):
     station = mommy.make(Station)
     self.aggregation = mommy.make(
         Aggregation,
         timeseries_group__gentity=station,
         timeseries_group__variable__descr="Hello",
         target_time_step="H",
         method="sum",
         max_missing=max_missing,
         resulting_timestamp_offset="1min",
     )
     self.aggregation._htimeseries = HTimeseries(self.source_timeseries)
     self.aggregation._htimeseries.time_step = "10min"
     return self.aggregation.process_timeseries().data
Exemplo n.º 29
0
 def test_execute(self):
     station = mommy.make(Station)
     self.range_check = mommy.make(
         RangeCheck,
         lower_bound=2,
         upper_bound=5,
         soft_lower_bound=3,
         soft_upper_bound=4,
         station=station,
         source_timeseries__gentity=station,
         target_timeseries__gentity=station,
     )
     self.range_check.htimeseries = HTimeseries(self.source_timeseries)
     result = self.range_check.process_timeseries()
     pd.testing.assert_frame_equal(result, self.expected_result)
Exemplo n.º 30
0
 def create_timeseries(self):
     self.htimeseries = HTimeseries()
     self.htimeseries.data = pd.DataFrame(
         index=[datetime(2017, 11, 23, 17, 23), datetime(2018, 11, 25, 1, 0)],
         data={"value": [1.0, 2.0], "flags": ["", ""]},
         columns=["value", "flags"],
     )
     self.station = mommy.make(models.Station)
     self.timeseries = mommy.make(
         models.Timeseries,
         id=42,
         gentity=self.station,
         time_zone__utc_offset=120,
         precision=2,
     )
Exemplo n.º 31
0
 def _time_step(self):
     """
     Return time step of all time series. If time step is not the same
     for all time series, raises exception.
     """
     time_step = None
     for filename in self.config.files:
         with open(filename) as f:
             t = HTimeseries(f, start_date="0001-01-01 00:00")
         item_time_step = t.time_step
         if time_step and (item_time_step != time_step):
             raise click.ClickException(
                 "Not all time series have the same step")
         time_step = item_time_step
     return time_step
Exemplo n.º 32
0
def h_integrate(mask, stations_layer, date, output_filename_prefix, date_fmt,
                funct, kwargs):
    date_fmt_for_filename = date.strftime(date_fmt).replace(" ", "-").replace(
        ":", "-")
    output_filename = "{}-{}.tif".format(output_filename_prefix,
                                         date.strftime(date_fmt_for_filename))
    if not _needs_calculation(output_filename, date, stations_layer):
        return

    # Read the time series values and add the 'value' attribute to
    # stations_layer
    stations_layer.CreateField(ogr.FieldDefn("value", ogr.OFTReal))
    input_files = []
    stations_layer.ResetReading()
    for station in stations_layer:
        filename = station.GetField("filename")
        with open(filename, newline="\n") as f:
            t = HTimeseries(f)
        try:
            value = t.data.loc[date.replace(tzinfo=None), "value"]
        except KeyError:
            value = np.nan
        station.SetField("value", value)
        if not isnan(value):
            input_files.append(filename)
        stations_layer.SetFeature(station)
    if not input_files:
        return

    # Create destination data source
    output = gdal.GetDriverByName("GTiff").Create(output_filename,
                                                  mask.RasterXSize,
                                                  mask.RasterYSize, 1,
                                                  gdal.GDT_Float32)
    output.SetMetadataItem("TIMESTAMP", date.strftime(date_fmt))
    output.SetMetadataItem("INPUT_FILES", "\n".join(input_files))

    try:
        # Set geotransform and projection in the output data source
        output.SetGeoTransform(mask.GetGeoTransform())
        output.SetProjection(mask.GetProjection())

        # Do the integration
        integrate(mask, stations_layer, output.GetRasterBand(1), funct, kwargs)
    finally:
        # Close the dataset
        output = None
Exemplo n.º 33
0
 def _get_all_data_as_pd(self):
     tzoffsetstring = self._get_tzoffsetstring_for_pg()
     with connection.cursor() as cursor:
         cursor.execute(
             """
             SELECT STRING_AGG(
                 TO_CHAR(timestamp at time zone %s, 'YYYY-MM-DD HH24:MI')
                     || ',' || value || ',' || flags,
                 E'\n'
                 ORDER BY timestamp
             ) || E'\n'
             FROM enhydris_timeseriesrecord
             WHERE timeseries_id=%s;
             """,
             [tzoffsetstring, self.id],
         )
         return HTimeseries(StringIO(cursor.fetchone()[0])).data
Exemplo n.º 34
0
def _needs_calculation(output_filename, date, stations_layer):
    """
    Used by h_integrate to check whether the output file needs to be calculated
    or not. It does not need to be calculated if it already exists and has been
    calculated from all available data.
    """
    # Return immediately if output file does not exist
    if not os.path.exists(output_filename):
        return True

    # Get list of files which were used to calculate the output file
    fp = gdal.Open(output_filename)
    try:
        actual_input_files = fp.GetMetadataItem("INPUT_FILES")
        if actual_input_files is None:
            raise IOError(
                "{} does not contain the metadata item INPUT_FILES".format(
                    output_filename))
    finally:
        fp = None  # Close file
    actual_input_files = set(actual_input_files.split("\n"))

    # Get list of files available for calculating the output file
    stations_layer.ResetReading()
    available_input_files = set([
        station.GetField("filename") for station in stations_layer
        if os.path.exists(station.GetField("filename"))
    ])

    # Which of these files have not been used?
    unused_files = available_input_files - actual_input_files

    # For each one of these files, check whether it has newly available data.
    # Upon finding one that does, the verdict is made: return True
    for filename in unused_files:
        with open(filename, newline="\n") as f:
            t = HTimeseries(f)
        try:
            value = t.data.loc[date.replace(tzinfo=None), "value"]
            if not isnan(value):
                return True
        except KeyError:
            continue

    # We were unable to find data that had not already been used
    return False