def test_compute_temperature_features_no_meter_data_tz( il_electricity_cdd_hdd_billing_monthly): meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_monthly[ "temperature_data"] meter_data.index = meter_data.index.tz_localize(None) with pytest.raises(ValueError): compute_temperature_features(meter_data.index, temperature_data)
def test_compute_temperature_features_no_temp_data_tz( il_electricity_cdd_hdd_billing_monthly): # pick a slice with both hdd and cdd meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_monthly[ "temperature_data"] temperature_data = temperature_data.tz_localize(None) with pytest.raises(ValueError): compute_temperature_features(meter_data.index, temperature_data)
def test_compute_temperature_features_no_freq_index( il_electricity_cdd_hdd_billing_monthly): # pick a slice with both hdd and cdd meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_monthly[ "temperature_data"] temperature_data.index.freq = None with pytest.raises(ValueError): compute_temperature_features(meter_data.index, temperature_data)
def test_compute_temperature_features_daily_bad_degree_days( il_electricity_cdd_hdd_daily): meter_data = il_electricity_cdd_hdd_daily["meter_data"] temperature_data = il_electricity_cdd_hdd_daily["temperature_data"] with pytest.raises(ValueError): compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[60, 61], cooling_balance_points=[65, 66], degree_day_method="UNKNOWN", )
def test_compute_temperature_features_with_duplicated_index( il_electricity_cdd_hdd_billing_monthly): meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_monthly[ "temperature_data"] # these are specifically formed to give a less readable error if # duplicates are not caught meter_data = meter_data.append(meter_data).sort_index() temperature_data = temperature_data.iloc[8000:] with pytest.raises(ValueError) as excinfo: compute_temperature_features(meter_data.index, temperature_data) assert str(excinfo.value) == "Duplicates found in input meter trace index."
def test_compute_temperature_features_hourly_bad_degree_days( il_electricity_cdd_hdd_hourly): # pick a slice with both hdd and cdd meter_data = il_electricity_cdd_hdd_hourly["meter_data"][ "2016-03-01":"2016-07-01"] temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"][ "2016-03-01":"2016-07-01"] with pytest.raises(ValueError): compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[60, 61], cooling_balance_points=[65, 66], degree_day_method="UNKNOWN", )
def test_compute_temperature_features_billing_bimonthly_hourly_degree_days( il_electricity_cdd_hdd_billing_bimonthly): meter_data = il_electricity_cdd_hdd_billing_bimonthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_bimonthly[ "temperature_data"] df = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[60, 61], cooling_balance_points=[65, 66], temperature_mean=False, degree_day_method="hourly", ) assert df.shape == (14, 6) assert list(sorted(df.columns)) == [ "cdd_65", "cdd_66", "hdd_60", "hdd_61", "n_hours_dropped", "n_hours_kept", ] assert round(df.hdd_60.mean(), 2) == 13.08 assert round(df.hdd_61.mean(), 2) == 13.69 assert round(df.cdd_65.mean(), 2) == 3.78 assert round(df.cdd_66.mean(), 2) == 3.46 assert round(df.n_hours_kept.mean(), 2) == 1386.93 assert round(df.n_hours_dropped.mean(), 2) == 0
def test_compute_temperature_features_billing_monthly_hourly_degree_days_use_mean_false( il_electricity_cdd_hdd_billing_monthly): meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_monthly[ "temperature_data"] df = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[60, 61], cooling_balance_points=[65, 66], temperature_mean=False, degree_day_method="hourly", use_mean_daily_values=False, ) assert df.shape == (27, 6) assert list(sorted(df.columns)) == [ "cdd_65", "cdd_66", "hdd_60", "hdd_61", "n_hours_dropped", "n_hours_kept", ] assert round(df.hdd_60.mean(), 2) == 343.01 assert round(df.hdd_61.mean(), 2) == 360.19 assert round(df.cdd_65.mean(), 2) == 121.29 assert round(df.cdd_66.mean(), 2) == 110.83 assert round(df.n_hours_kept.mean(), 2) == 719.15 assert round(df.n_hours_dropped.mean(), 2) == 0
def test_compute_temperature_features_billing_monthly_daily_degree_days( il_electricity_cdd_hdd_billing_monthly): meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_monthly[ "temperature_data"] df = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[60, 61], cooling_balance_points=[65, 66], temperature_mean=False, degree_day_method="daily", ) assert df.shape == (27, 6) assert list(sorted(df.columns)) == [ "cdd_65", "cdd_66", "hdd_60", "hdd_61", "n_days_dropped", "n_days_kept", ] assert round(df.hdd_60.mean(), 2) == 11.42 assert round(df.hdd_61.mean(), 2) == 12.0 assert round(df.cdd_65.mean(), 2) == 3.54 assert round(df.cdd_66.mean(), 2) == 3.19 assert round(df.n_days_kept.mean(), 2) == 29.96 assert round(df.n_days_dropped.mean(), 2) == 0.04
def create_caltrack_billing_design_matrix(meter_data, temperature_data): """A helper function which calls basic feature creation methods to create a design matrix suitable for use with CalTRACK Billing methods. Parameters ---------- meter_data : :any:`pandas.DataFrame` Hourly meter data in eemeter format. temperature_data : :any:`pandas.Series` Hourly temperature data in eemeter format. Returns ------- design_matrix : :any:`pandas.DataFrame` A design matrics with mean usage_per_day, hdd_30-hdd_90, and cdd_30-cdd_90 features. """ usage_per_day = compute_usage_per_day_feature(meter_data, series_name="meter_value") temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=range(30, 91), cooling_balance_points=range(30, 91), data_quality=True, tolerance=pd.Timedelta( "35D" ), # limit temperature data matching to periods of up to 35 days. ) design_matrix = merge_features([usage_per_day, temperature_features]) return design_matrix
def create_caltrack_hourly_preliminary_design_matrix(meter_data, temperature_data): """A helper function which calls basic feature creation methods to create an input suitable for use in the first step of creating a CalTRACK hourly model. Parameters ---------- meter_data : :any:`pandas.DataFrame` Hourly meter data in eemeter format. temperature_data : :any:`pandas.Series` Hourly temperature data in eemeter format. Returns ------- design_matrix : :any:`pandas.DataFrame` A design matrix with meter_value, hour_of_week, hdd_50, and cdd_65 features. """ time_features = compute_time_features( meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False ) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) design_matrix = merge_features( [meter_data.value.to_frame("meter_value"), temperature_features, time_features] ) return design_matrix
def test_compute_temperature_features_hourly_hourly_degree_days( il_electricity_cdd_hdd_hourly, snapshot): # pick a slice with both hdd and cdd meter_data = il_electricity_cdd_hdd_hourly["meter_data"][ "2016-03-01":"2016-07-01"] temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"][ "2016-03-01":"2016-07-01"] df = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[60, 61], cooling_balance_points=[65, 66], temperature_mean=False, degree_day_method="hourly", ) assert list(sorted(df.columns)) == [ "cdd_65", "cdd_66", "hdd_60", "hdd_61", "n_hours_dropped", "n_hours_kept", ] assert df.shape == (2952, 6) snapshot.assert_match( [ round(df.hdd_60.mean(), 2), round(df.hdd_61.mean(), 2), round(df.cdd_65.mean(), 2), round(df.cdd_66.mean(), 2), round(df.n_hours_kept.mean(), 2), round(df.n_hours_dropped.mean(), 2), ], "values", )
def test_compute_temperature_features_billing_bimonthly_daily_degree_days( il_electricity_cdd_hdd_billing_bimonthly, snapshot): meter_data = il_electricity_cdd_hdd_billing_bimonthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_bimonthly[ "temperature_data"] df = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[60, 61], cooling_balance_points=[65, 66], temperature_mean=False, degree_day_method="daily", ) assert df.shape == (14, 6) assert list(sorted(df.columns)) == [ "cdd_65", "cdd_66", "hdd_60", "hdd_61", "n_days_dropped", "n_days_kept", ] snapshot.assert_match( [ round(df.hdd_60.mean(), 2), round(df.hdd_61.mean(), 2), round(df.cdd_65.mean(), 2), round(df.cdd_66.mean(), 2), round(df.n_days_kept.mean(), 2), round(df.n_days_dropped.mean(), 2), ], "values", )
def create_caltrack_daily_design_matrix(meter_data, temperature_data): usage_per_day = compute_usage_per_day_feature(meter_data, series_name="meter_value") temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=range(30, 91), cooling_balance_points=range(30, 91), data_quality=True, ) design_matrix = merge_features([usage_per_day, temperature_features]) return design_matrix
def test_compute_temperature_features_billing_bimonthly_temp_mean( il_electricity_cdd_hdd_billing_bimonthly): meter_data = il_electricity_cdd_hdd_billing_bimonthly["meter_data"] temperature_data = il_electricity_cdd_hdd_billing_bimonthly[ "temperature_data"] df = compute_temperature_features(meter_data.index, temperature_data) assert df.shape == (14, 3) assert list(sorted(df.columns)) == [ "n_days_dropped", "n_days_kept", "temperature_mean", ] assert round(df.temperature_mean.mean()) == 55.0
def occupancy_precursor(il_electricity_cdd_hdd_hourly): meter_data = il_electricity_cdd_hdd_hourly["meter_data"] temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"] time_features = compute_time_features(meter_data.index) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) return merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ])
def create_caltrack_billing_design_matrix(meter_data, temperature_data): usage_per_day = compute_usage_per_day_feature(meter_data, series_name="meter_value") temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=range(30, 91), cooling_balance_points=range(30, 91), data_quality=True, tolerance=pd.Timedelta( "35D" ), # limit temperature data matching to periods of up to 35 days. ) design_matrix = merge_features([usage_per_day, temperature_features]) return design_matrix
def test_compute_temperature_features_shorter_temperature_data( il_electricity_cdd_hdd_daily): meter_data = il_electricity_cdd_hdd_daily["meter_data"] temperature_data = il_electricity_cdd_hdd_daily["temperature_data"] # drop some data temperature_data = temperature_data[:-200] df = compute_temperature_features(meter_data.index, temperature_data) assert df.shape == (810, 3) assert list(sorted(df.columns)) == [ "n_days_dropped", "n_days_kept", "temperature_mean", ] assert round(df.temperature_mean.sum()) == 43958.0
def test_compute_temperature_features_hourly_temp_mean( il_electricity_cdd_hdd_hourly): # pick a slice with both hdd and cdd meter_data = il_electricity_cdd_hdd_hourly["meter_data"][ "2016-03-01":"2016-07-01"] temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"][ "2016-03-01":"2016-07-01"] df = compute_temperature_features(meter_data.index, temperature_data) assert list(sorted(df.columns)) == [ "n_hours_dropped", "n_hours_kept", "temperature_mean", ] assert df.shape == (2952, 3) assert round(df.temperature_mean.mean()) == 62.0
def test_compute_temperature_features_daily_data_quality( il_electricity_cdd_hdd_daily): meter_data = il_electricity_cdd_hdd_daily["meter_data"] temperature_data = il_electricity_cdd_hdd_daily["temperature_data"] df = compute_temperature_features(meter_data.index, temperature_data, temperature_mean=False, data_quality=True) assert df.shape == (810, 4) assert list(sorted(df.columns)) == [ "n_days_dropped", "n_days_kept", "temperature_not_null", "temperature_null", ] assert round(df.temperature_not_null.mean(), 2) == 23.99 assert round(df.temperature_null.mean(), 2) == 0.00
def create_caltrack_hourly_preliminary_design_matrix(meter_data, temperature_data): time_features = compute_time_features(meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) design_matrix = merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ]) return design_matrix
def test_compute_temperature_features_shorter_meter_data( il_electricity_cdd_hdd_daily): meter_data = il_electricity_cdd_hdd_daily["meter_data"] temperature_data = il_electricity_cdd_hdd_daily["temperature_data"] # drop some data meter_data = meter_data[:-10] df = compute_temperature_features(meter_data.index, temperature_data) assert df.shape == (800, 3) assert list(sorted(df.columns)) == [ "n_days_dropped", "n_days_kept", "temperature_mean", ] assert round(df.temperature_mean.sum()) == 43904.0 # ensure last row is NaN'ed assert pd.isnull(df.iloc[-1].n_days_kept)
def occupancy_precursor_only_nan(il_electricity_cdd_hdd_hourly): meter_data = il_electricity_cdd_hdd_hourly["meter_data"] meter_data = meter_data[datetime(2017, 1, 4):datetime(2017, 6, 1)] meter_data.iloc[-1] = np.nan # Simulates a segment where there is only a single nan value temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"] time_features = compute_time_features(meter_data.index) temperature_features = compute_temperature_features( meter_data.index, temperature_data, heating_balance_points=[50], cooling_balance_points=[65], degree_day_method="hourly", ) return merge_features([ meter_data.value.to_frame("meter_value"), temperature_features, time_features ])
def test_compute_temperature_features_empty_temperature_data(): index = pd.DatetimeIndex([], tz="UTC", name="dt", freq="H") temperature_data = pd.Series({"value": []}, index=index).astype(float) result_index = temperature_data.resample("D").sum().index meter_data_hack = pd.DataFrame({"value": 0}, index=result_index) df = compute_temperature_features( meter_data_hack.index, temperature_data, heating_balance_points=[65], cooling_balance_points=[65], degree_day_method="daily", use_mean_daily_values=False, ) assert df.shape == (0, 3) assert list(sorted(df.columns)) == [ "n_days_dropped", "n_days_kept", "temperature_mean", ] assert round(df.temperature_mean.sum()) == 0
def test_compute_temperature_features_hourly_data_quality( il_electricity_cdd_hdd_hourly): # pick a slice with both hdd and cdd meter_data = il_electricity_cdd_hdd_hourly["meter_data"][ "2016-03-01":"2016-07-01"] temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"][ "2016-03-01":"2016-07-01"] df = compute_temperature_features(meter_data.index, temperature_data, temperature_mean=False, data_quality=True) assert df.shape == (2952, 4) assert list(sorted(df.columns)) == [ "n_hours_dropped", "n_hours_kept", "temperature_not_null", "temperature_null", ] assert round(df.temperature_not_null.mean(), 2) == 1.0 assert round(df.temperature_null.mean(), 2) == 0.0