def test_get_date_ranges_to_download(): date_ranges = utils.get_date_ranges_to_download(TEST_HDF, PV_SYSTEM, '2018-01-01', '2019-01-10') # 2018-01-02 and 2018-01-03 are already known to be missing. np.testing.assert_array_equal(date_ranges, [ DateRange(start_date=date(2018, 1, 1), end_date=date(2018, 12, 31)), DateRange(start_date=date(2019, 1, 4), end_date=date(2019, 1, 10)) ])
def _filter_date_range( self, store_filename: str, system_id: int, date_ranges: Iterable[DateRange], min_data_availability: Optional[float] = 0.5) -> List[DateRange]: """Check getstatistic to see if system_id has data for all date ranges. Args: system_id: PV system ID. store_filename: HDF5 filename to cache statistics to / from. date_ranges: List of DateRange objects. min_data_availability: A float in the range [0, 1]. 1 means only accept PV systems which have no days of missing data. 0 means accept all PV systems, no matter if they have missing data. """ if not date_ranges: return date_ranges stats = self._get_statistic_with_cache( store_filename, system_id, date_to=date_ranges[-1].end_date, wait_if_rate_limit_exceeded=True).squeeze() if (pd.isnull(stats['actual_date_from']) or pd.isnull(stats['actual_date_to'])): _LOG.info('system_id %d: Stats say there is no data!', system_id) return [] timeseries_date_range = DateRange(stats['actual_date_from'], stats['actual_date_to']) data_availability = (stats['num_outputs'] / (timeseries_date_range.total_days() + 1)) if data_availability < min_data_availability: _LOG.info( 'system_id %d: Data availability too low! Only %.0f %%.', system_id, data_availability * 100) return [] new_date_ranges = [] for date_range in date_ranges: new_date_range = date_range.intersection(timeseries_date_range) if new_date_range: new_date_ranges.append(new_date_range) return new_date_ranges
def test_merge_date_ranges_to_years(): jan = DateRange("2018-01-01", "2018-02-01") multiyear = DateRange("2017-01-01", "2018-02-01") old_multiyear = DateRange("2014-01-01", "2016-02-01") ancient_jan = DateRange("2010-01-01", "2010-02-01") for date_ranges, merged in [([], []), ([jan], [DateRange("2017-02-01", "2018-02-01")]), ([multiyear], [ DateRange("2017-02-01", "2018-02-01"), DateRange("2016-02-02", "2017-02-01") ]), ([old_multiyear, multiyear], [ DateRange("2017-02-01", "2018-02-01"), DateRange("2016-02-02", "2017-02-01"), DateRange("2015-02-01", "2016-02-01"), DateRange("2014-02-01", "2015-02-01"), DateRange("2013-02-01", "2014-02-01") ]), ([ancient_jan, old_multiyear, multiyear], [ DateRange("2017-02-01", "2018-02-01"), DateRange("2016-02-02", "2017-02-01"), DateRange("2015-02-01", "2016-02-01"), DateRange("2014-02-01", "2015-02-01"), DateRange("2013-02-01", "2014-02-01"), DateRange("2009-02-01", "2010-02-01") ])]: assert merge_date_ranges_to_years(date_ranges) == merged
def test_split_into_years(): short_dr = DateRange("2019-01-01", "2019-01-10") assert short_dr.split_into_years() == [short_dr] one_year = DateRange("2019-01-01", "2020-01-01") assert one_year.split_into_years() == [one_year] year_and_half = DateRange("2019-01-01", "2020-06-01") assert year_and_half.split_into_years() == [ DateRange("2019-06-02", "2020-06-01"), DateRange("2019-01-01", "2019-06-02") ]
def test_total_days(): assert DateRange("2019-01-01", "2019-01-10").total_days() == 9
def test_intersection(): assert DateRange("2019-01-01", "2019-01-02").intersection( DateRange("2020-01-01", "2020-01-02")) is None assert DateRange("2019-01-01", "2019-01-10").intersection( DateRange("2019-01-01", "2019-01-02")) == DateRange("2019-01-01", "2019-01-02") assert DateRange("2019-01-01", "2019-01-10").intersection( DateRange("2019-01-05", "2019-01-20")) == DateRange("2019-01-05", "2019-01-10") year = DateRange("2018-01-1", "2019-01-01") dec = DateRange("2018-12-01", "2019-01-01") assert year.intersection(dec) == dec june = DateRange("2018-06-01", "2018-07-01") assert year.intersection(june) == june incomplete_overlap = DateRange("2017-07-01", "2018-02-01") assert year.intersection(incomplete_overlap) != incomplete_overlap