def get_observations(time_slots, observed_property, observation_provider_model, feature_of_interest, process, t, lag_window_size, future_window_size): before_intervals = [] after_intervals = [] if len(time_slots) == 0: return [] from_date = time_slots[0].lower ts = generate_n_intervals(t, from_date, 3) time_slot_diff = ts[1].lower - ts[0].lower if lag_window_size and lag_window_size > 0: bef_time_diff = time_slot_diff * lag_window_size + \ (time_slots[0].upper - time_slots[0].lower) bef_time_diff = bef_time_diff.total_seconds() from_datetime = time_slots[0].lower - timedelta(seconds=bef_time_diff) before_intervals = generate_intervals( timeslots=t, from_datetime=from_datetime, to_datetime=time_slots[0].lower, ) before_intervals = before_intervals[-lag_window_size:] if future_window_size and future_window_size > 0: after_time_diff = time_slot_diff * future_window_size + \ (time_slots[0].upper - time_slots[0].lower) after_time_diff = after_time_diff.total_seconds() to_datetime = time_slots[-1].lower + timedelta(seconds=after_time_diff) after_intervals = generate_intervals( timeslots=t, from_datetime=time_slots[-1].lower, to_datetime=to_datetime, ) after_intervals = after_intervals[1:] after_intervals = after_intervals[-future_window_size:] extended_time_slots = before_intervals + time_slots + after_intervals return prepare_data(extended_time_slots, observed_property, observation_provider_model, feature_of_interest, process, t)
def test_2_hour_slots_every_hour(self): t = TimeSlots(zero=datetime(2000, 1, 1, 0, 00, 00).replace(tzinfo=UTC_P0100), frequency=relativedelta(hours=1), range_from=relativedelta(hours=0), range_to=relativedelta(hours=2)) t.clean() result_slots = generate_intervals( timeslots=t, from_datetime=datetime(2000, 1, 3, 5, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2000, 1, 3, 6, 00, 00).replace(tzinfo=UTC_P0100), ) expected_slots = [ DateTimeTZRange(lower=datetime(2000, 1, 3, 4, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 3, 6, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 1, 3, 5, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 3, 7, 0).replace(tzinfo=UTC_P0100)) ] self.assertEqual(expected_slots, result_slots)
def test_interval_from_first_day_of_year(self): t = TimeSlots(zero=datetime(2000, 1, 1, 0, 00, 00).replace(tzinfo=UTC_P0100), frequency=relativedelta(years=1), range_from=relativedelta(0), range_to=relativedelta(days=3, hours=3)) t.clean() result_slots = generate_intervals( timeslots=t, from_datetime=datetime(2000, 1, 1, 0, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2002, 1, 1, 0, 00, 00).replace(tzinfo=UTC_P0100), ) expected_slots = [ DateTimeTZRange(lower=datetime(2000, 1, 1, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 4, 3, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2001, 1, 1, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2001, 1, 4, 3, 0).replace(tzinfo=UTC_P0100)) ] self.assertEqual(expected_slots, result_slots)
def test_last_day_of_month(self): t = TimeSlots(zero=datetime(2000, 1, 1, 0, 00, 00).replace(tzinfo=UTC_P0100), frequency=relativedelta(months=1), range_from=relativedelta(days=-1), range_to=relativedelta(0)) t.clean() result_slots = generate_intervals( timeslots=t, from_datetime=datetime(2000, 2, 3, 0, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2000, 5, 31, 0, 00, 00).replace(tzinfo=UTC_P0100), ) expected_slots = [ DateTimeTZRange(lower=datetime(2000, 2, 29, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 3, 1, 0, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 3, 31, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 4, 1, 0, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 4, 30, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 5, 1, 0, 0).replace(tzinfo=UTC_P0100)) ] self.assertEqual(expected_slots, result_slots)
def get_value_frequency(t, from_datetime): to = from_datetime + t.frequency + t.frequency result_slots = generate_intervals( timeslots=t, from_datetime=from_datetime, to_datetime=to, ) if len(result_slots) < 2: return None diff = (result_slots[1].lower - result_slots[0].lower).total_seconds() return diff
def test_timeseries_default_values(self): t = TimeSlots(zero=default_zero) t.clean() result_slots = generate_intervals( timeslots=t, from_datetime=datetime(2000, 1, 3, 0, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2000, 1, 3, 2, 00, 00).replace(tzinfo=UTC_P0100), ) expected_slots = [ DateTimeTZRange(lower=datetime(2000, 1, 3, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 3, 1, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 1, 3, 1, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 3, 2, 0).replace(tzinfo=UTC_P0100)) ] self.assertEqual(expected_slots, result_slots)
def test_from_limit(self): t = TimeSlots(zero=default_zero, frequency=relativedelta(hours=1), range_from=relativedelta(hours=0), range_to=relativedelta(hours=1)) t.clean() result_slots = generate_intervals( timeslots=t, from_datetime=datetime(2000, 1, 3, 0, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2000, 1, 3, 2, 00, 00).replace(tzinfo=UTC_P0100), range_from_limit=datetime(2000, 1, 3, 1, 00, 00).replace(tzinfo=UTC_P0100)) expected_slots = [ DateTimeTZRange(lower=datetime(2000, 1, 3, 1, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 3, 2, 0).replace(tzinfo=UTC_P0100)) ] self.assertEqual(expected_slots, result_slots)
def test_3_hour_slots_wednesday_from_8_to_11(self): t = TimeSlots(zero=datetime(2000, 1, 1, 0, 00, 00).replace(tzinfo=UTC_P0100), frequency=relativedelta(days=7), range_from=relativedelta(days=4, hours=8), range_to=relativedelta(days=4, hours=11)) t.clean() result_slots = generate_intervals( timeslots=t, from_datetime=datetime(2000, 1, 3, 0, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2000, 1, 31, 0, 00, 00).replace(tzinfo=UTC_P0100), ) expected_slots = [ DateTimeTZRange(lower=datetime(2000, 1, 5, 8, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 5, 11, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 1, 12, 8, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 12, 11, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 1, 19, 8, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 19, 11, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 1, 26, 8, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 26, 11, 0).replace(tzinfo=UTC_P0100)) ] self.assertEqual(expected_slots, result_slots)
def get_empty_slots(t, pt_range_z): return generate_intervals(timeslots=t, from_datetime=pt_range_z.lower, to_datetime=pt_range_z.upper, range_to_limit=pt_range_z.upper, range_from_limit=pt_range_z.lower)
def test_week_slots(self): t = TimeSlots(zero=datetime(2000, 1, 3, 0, 00, 00).replace(tzinfo=UTC_P0100), frequency=relativedelta(days=7), range_from=relativedelta(hours=0), range_to=relativedelta(days=7)) t.clean() t2 = TimeSlots(zero=datetime(2000, 1, 1, 0, 00, 00).replace(tzinfo=UTC_P0100), frequency=relativedelta(days=7), range_from=relativedelta(days=2), range_to=relativedelta(days=9)) t2.clean() t3 = TimeSlots(zero=datetime(2000, 1, 1, 0, 00, 00).replace(tzinfo=UTC_P0100), frequency=relativedelta(days=7), range_from=relativedelta(days=-5), range_to=relativedelta(days=2)) t3.clean() i1 = generate_intervals( timeslots=t, from_datetime=datetime(2000, 1, 3, 0, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2000, 1, 31, 0, 00, 00).replace(tzinfo=UTC_P0100), ) i2 = generate_intervals( timeslots=t2, from_datetime=datetime(2000, 1, 3, 0, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2000, 1, 31, 0, 00, 00).replace(tzinfo=UTC_P0100), ) i3 = generate_intervals( timeslots=t3, from_datetime=datetime(2000, 1, 3, 0, 00, 00).replace(tzinfo=UTC_P0100), to_datetime=datetime(2000, 1, 31, 0, 00, 00).replace(tzinfo=UTC_P0100), ) expected_slots = [ DateTimeTZRange(lower=datetime(2000, 1, 3, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 10, 0, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 1, 10, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 17, 0, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 1, 17, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 24, 0, 0).replace(tzinfo=UTC_P0100)), DateTimeTZRange(lower=datetime(2000, 1, 24, 0, 0).replace(tzinfo=UTC_P0100), upper=datetime(2000, 1, 31, 0, 0).replace(tzinfo=UTC_P0100)) ] self.assertEqual(expected_slots, i1) self.assertEqual(i1, i2) self.assertEqual(i1, i3) self.assertEqual(i2, i3)
def process_feature( process_method, provider, observed_property, id_by_provider, process_name_id, ref_time_slots_id, aggregate_updated_since, ts_id ): range_from_limit = None range_to_limit = None max_updated_at = None from_value = None to_value = None provider_module, provider_model, error_message = import_models(provider) if error_message: raise Exception("Importing error - %s : %s" % (provider, error_message)) feature_of_interest_model = provider_module._meta.get_field( 'feature_of_interest').remote_field.model path = provider.rsplit('.', 1) provider_module = import_module(path[0]) provider_model = getattr(provider_module, path[1]) prop_item = Property.objects.get(name_id=observed_property) item = feature_of_interest_model.objects.get(id_by_provider=id_by_provider) process_calc = Process.objects.get(name_id=process_method) try: process = Process.objects.get(name_id=process_name_id) except Process.DoesNotExist: process = None try: t = TimeSlots.objects.get(name_id=ts_id) except TimeSlots.DoesNotExist: raise Exception('Time_slots with desired id not found.') ref_ts = None if ref_time_slots_id != None: try: ref_ts = TimeSlots.objects.get(name_id=ref_time_slots_id) except TimeSlots.DoesNotExist: raise Exception('REF TS ID - Time_slots with desired id not found.') if ref_ts == None: range_to_limit_observation = provider_model.objects.filter( observed_property=prop_item, procedure=process, feature_of_interest=item ).annotate( field_upper=Func(F('phenomenon_time_range'), function='UPPER') ).order_by('-field_upper')[:1] else: range_to_limit_observation = provider_model.objects.filter( observed_property=prop_item, time_slots=ref_ts, procedure=process_calc, feature_of_interest=item ).annotate( field_upper=Func(F('phenomenon_time_range'), function='UPPER') ).order_by('-field_upper')[:1] if not range_to_limit_observation: return range_to_limit = range_to_limit_observation[0].phenomenon_time_range.upper if ref_ts == None: range_from_limit_observation = provider_model.objects.filter( observed_property=prop_item, procedure=process, feature_of_interest=item ).annotate( field_lower=Func(F('phenomenon_time_range'), function='LOWER') ).order_by('field_lower')[:1] else: range_from_limit_observation = provider_model.objects.filter( observed_property=prop_item, time_slots=ref_ts, procedure=process_calc, feature_of_interest=item ).annotate( field_lower=Func(F('phenomenon_time_range'), function='LOWER') ).order_by('field_lower')[:1] if not range_from_limit_observation: return range_from_limit = range_from_limit_observation[0].phenomenon_time_range.lower max_updated_at_observation = provider_model.objects.filter( observed_property=prop_item, procedure=process_calc, # prop_item.default_mean, feature_of_interest=item, time_slots=t ).order_by('-updated_at')[:1] if max_updated_at_observation and not aggregate_updated_since: max_updated_at = max_updated_at_observation[0].updated_at elif aggregate_updated_since: max_updated_at = aggregate_updated_since if max_updated_at: if ref_ts == None: from_observation = provider_model.objects.filter( observed_property=prop_item, procedure=process, feature_of_interest=item, updated_at__gte=max_updated_at ).annotate( field_lower=Func(F('phenomenon_time_range'), function='LOWER') ).order_by('field_lower')[:1] else: from_observation = provider_model.objects.filter( observed_property=prop_item, time_slots=ref_ts, procedure=process_calc, feature_of_interest=item, updated_at__gte=max_updated_at ).annotate( field_lower=Func(F('phenomenon_time_range'), function='LOWER') ).order_by('field_lower')[:1] if from_observation: from_value = from_observation[0].phenomenon_time_range.lower if ref_ts == None: to_observation = provider_model.objects.filter( observed_property=prop_item, procedure=process, feature_of_interest=item, updated_at__gte=max_updated_at ).annotate( field_upper=Func(F('phenomenon_time_range'), function='UPPER') ).order_by('-field_upper')[:1] else: to_observation = provider_model.objects.filter( observed_property=prop_item, time_slots=ref_ts, procedure=process_calc, feature_of_interest=item, updated_at__gte=max_updated_at ).annotate( field_upper=Func(F('phenomenon_time_range'), function='UPPER') ).order_by('-field_upper')[:1] if to_observation: to_value = to_observation[0].phenomenon_time_range.upper else: from_value = range_from_limit to_value = range_to_limit if from_value and to_value and to_value > from_value: from_value = from_value.astimezone(UTC_P0100) to_value = to_value.astimezone(UTC_P0100) # range_from_limit = range_from_limit.astimezone(UTC_P0100) # range_to_limit = range_to_limit.astimezone(UTC_P0100) result_slots = generate_intervals( timeslots=t, from_datetime=from_value, to_datetime=to_value # , # range_from_limit=range_from_limit, # range_to_limit=range_to_limit ) for slot in result_slots: if ref_ts == None: observations = provider_model.objects.filter( observed_property=prop_item, procedure=process, feature_of_interest=item, phenomenon_time_range__contained_by=slot ) else: observations = provider_model.objects.filter( observed_property=prop_item, time_slots=ref_ts, procedure=process_calc, feature_of_interest=item, phenomenon_time_range__contained_by=slot ) ids_to_agg = [] for obs in observations: ids_to_agg.append(obs.id) aggregate_observations( observations, provider_model, prop_item, slot, item, process_calc, t )