def test_isd_weather_source(periods, isd_weather_source): isd_weather_source = ISDWeatherSource(*isd_weather_source) avg_temps = isd_weather_source.average_temperature(periods,"degF") assert_allclose(avg_temps, [66.576,68.047,74.697], rtol=RTOL,atol=ATOL) hdds = isd_weather_source.hdd(periods,"degF",65) assert_allclose(hdds, [0.61,17.1,0.000], rtol=RTOL,atol=ATOL) cdds = isd_weather_source.cdd(periods,"degF",65) assert_allclose(cdds, [42.06,107.0925,292.46837], rtol=RTOL,atol=ATOL) hourly_temps = isd_weather_source.hourly_temperatures(periods,"degF") assert_allclose(hourly_temps[0][:5],[69.98,66.92,64.04,62.96,62.96],rtol=RTOL,atol=ATOL) hourly_temps = isd_weather_source.hourly_temperatures(periods[0],"degF") assert_allclose(hourly_temps[:5],[69.98,66.92,64.04,62.96,62.96],rtol=RTOL,atol=ATOL) # test single period case (is iterable type error caught?) daily_temps = isd_weather_source.daily_temperatures(periods[0],"degF") assert_allclose(daily_temps[:3], [66.466,66.098,66.685], rtol=RTOL, atol=ATOL) # test single period case (is iterable type error caught?) daily_temps = isd_weather_source.daily_temperatures(periods[0],"degF") avg_temp = isd_weather_source.average_temperature(periods[0],"degF") assert_allclose(avg_temp, 66.576, rtol=RTOL, atol=ATOL)
def test_isd_weather_source(consumption_history_one_summer_electricity,isd_weather_source): isd_weather_source = ISDWeatherSource(*isd_weather_source) consumptions = consumption_history_one_summer_electricity.get("electricity") avg_temps = isd_weather_source.average_temperature(consumptions,"degF") assert_allclose(avg_temps, [66.576,68.047,74.697], rtol=RTOL,atol=ATOL) hdds = isd_weather_source.hdd(consumptions,"degF",65) assert_allclose(hdds, [0.945,24.517,0.000], rtol=RTOL,atol=ATOL) cdds = isd_weather_source.cdd(consumptions,"degF",65) assert_allclose(cdds, [42.06,107.0925,292.46837], rtol=RTOL,atol=ATOL) hourly_temps = isd_weather_source.hourly_temperatures(consumptions,"degF") assert_allclose(hourly_temps[0][:5],[69.98,66.92,64.04,62.96,62.96],rtol=RTOL,atol=ATOL) hourly_temps = isd_weather_source.hourly_temperatures(consumptions[0],"degF") assert_allclose(hourly_temps[:5],[69.98,66.92,64.04,62.96,62.96],rtol=RTOL,atol=ATOL) # test single consumption case (is iterable type error caught?) daily_temps = isd_weather_source.daily_temperatures(consumptions[0],"degF") assert_allclose(daily_temps[:3], [66.466,66.098,66.685], rtol=RTOL, atol=ATOL) # test single consumption case (is iterable type error caught?) daily_temps = isd_weather_source.daily_temperatures(consumptions[0],"degF") avg_temp = isd_weather_source.average_temperature(consumptions[0],"degF") assert_allclose(avg_temp, 66.576, rtol=RTOL, atol=ATOL)
def test_isd_weather_source(consumption_history_one_summer_electricity,isd_weather_source): isd_weather_source = ISDWeatherSource(*isd_weather_source) consumptions = consumption_history_one_summer_electricity.get("electricity") avg_temps = isd_weather_source.average_temperature(consumptions,"degF") assert_allclose(avg_temps, [66.576,68.047,74.697], rtol=RTOL,atol=ATOL) hdds = isd_weather_source.hdd(consumptions,"degF",65) assert_allclose(hdds, [0.294,20.309,0.0], rtol=RTOL,atol=ATOL) cdds = isd_weather_source.cdd(consumptions,"degF",65) assert_allclose(cdds, [47.603,113.775,300.722], rtol=RTOL,atol=ATOL)
def test_isd_weather_source(consumption_history_one_summer_electricity,isd_weather_source): isd_weather_source = ISDWeatherSource(*isd_weather_source) consumptions = consumption_history_one_summer_electricity.get("electricity") avg_temps = isd_weather_source.get_average_temperature(consumptions,"degF") assert abs(avg_temps[0] - 66.576956521739135) < EPSILON assert abs(avg_temps[1] - 68.047780898876411) < EPSILON assert abs(avg_temps[2] - 74.697162921348323) < EPSILON hdds = isd_weather_source.get_hdd(consumptions,"degF",65) assert abs(hdds[0] - 0.29478220869567906) < EPSILON assert abs(hdds[1] - 20.309999600000033) < EPSILON assert abs(hdds[2] - 0.0) < EPSILON cdds = isd_weather_source.get_cdd(consumptions,"degF",65) assert abs(cdds[0] - 47.603489860868635) < EPSILON assert abs(cdds[1] - 113.77566417391201) < EPSILON assert abs(cdds[2] - 300.72214678735065) < EPSILON
def test_cache_deletes_old_records(): ws = ISDWeatherSource("722660", 2012, 2012) # Make sure there are two records to begin with (this just happens to be # the case for this weather station at this particular hour - usually there # is only one record per hour. temperature_set = ws.get_temperature_set() assert 2 == sum([t.dt == datetime(2012, 1, 1, 0) for t in temperature_set]) # overwrite it records = [{"temp_C": 0, "dt": datetime(2012, 1, 1, 0)}] ws.update_cache(records) # Now there should just be one temperature_set = ws.get_temperature_set() assert 1 == sum([t.dt == datetime(2012, 1, 1, 0) for t in temperature_set])
def test_load_cached(monkeypatch): f = tempfile.NamedTemporaryFile() monkeypatch.setenv('EEMETER_WEATHER_CACHE_URL', 'sqlite:///{}'.format(f.name)) ws = ISDWeatherSource('722880') ws.client = MockWeatherClient() assert ws.tempC.empty ws.add_year(2015) assert not ws.tempC.empty ws = ISDWeatherSource('722880') assert ws.tempC.empty ws.load_cached(2013, 2017) assert not ws.tempC.empty f.close()
def test_isd_weather_cache_00(): global ws_pk ws = ISDWeatherSource("722660", 2012, 2012) assert 8783 == len(ws.data) assert 11652 == len(ws.get_temperature_set().fetchall()) assert ws.weather_station_pk == ws_pk ws = ISDWeatherSource("722660", 2013, 2013) assert 17542 == len(ws.data) assert 23551 == len(ws.get_temperature_set().fetchall()) assert ws.weather_station_pk == ws_pk # should be fast now for i in range(2): ws = ISDWeatherSource("722660", 2012, 2013) assert 17542 == len(ws.data) assert 23551 == len(ws.get_temperature_set().fetchall()) assert ws.weather_station_pk == ws_pk
def mock_isd_weather_source(): tmp_dir = tempfile.mkdtemp() ws = ISDWeatherSource("722880", tmp_dir) ws.client = MockWeatherClient() return ws
def test_not_mocked(): ws = ISDWeatherSource('722880') ws.add_year_range(2011, 2011)
def test_bad_isd_station(): with pytest.raises(ValueError): ISDWeatherSource("INVALID")
def get_single_thermostat(thermostat_id, zipcode, equipment_type, utc_offset, interval_data_filename): """ Load a single thermostat directly from an interval data file. Parameters ---------- thermostat_id : str A unique identifier for the thermostat. zipcode : str The zipcode of the thermostat, e.g. `"01234"`. equipment_type : str The equipment type of the thermostat. utc_offset : str A string representing the UTC offset of the interval data, e.g. `"-0700"`. Could also be `"Z"` (UTC), or just `"+7"` (equivalent to `"+0700"`), or any other timezone format recognized by the library method dateutil.parser.parse. interval_data_filename : str The path to the CSV in which the interval data is stored. Returns ------- thermostat : thermostat.Thermostat The loaded thermostat object. """ df = pd.read_csv(interval_data_filename) heating, cooling, aux_emerg = _get_equipment_type(equipment_type) # load indices dates = pd.to_datetime(df["date"]) daily_index = pd.DatetimeIndex(start=dates[0], periods=dates.shape[0], freq="D") hourly_index = pd.DatetimeIndex(start=dates[0], periods=dates.shape[0] * 24, freq="H") hourly_index_utc = pd.DatetimeIndex(start=dates[0], periods=dates.shape[0] * 24, freq="H", tz=pytz.UTC) # raise an error if dates are not aligned if not all(dates == daily_index): message("Dates provided for thermostat_id={} may contain some " "which are out of order, missing, or duplicated.".format( thermostat_id)) raise ValueError(message) # load hourly time series values temp_in = pd.Series(_get_hourly_block(df, "temp_in"), hourly_index) if heating: heating_setpoint = pd.Series(_get_hourly_block(df, "heating_setpoint"), hourly_index) else: heating_setpoint = None if cooling: cooling_setpoint = pd.Series(_get_hourly_block(df, "cooling_setpoint"), hourly_index) else: cooling_setpoint = None if aux_emerg: auxiliary_heat_runtime = pd.Series( _get_hourly_block(df, "auxiliary_heat_runtime"), hourly_index) emergency_heat_runtime = pd.Series( _get_hourly_block(df, "emergency_heat_runtime"), hourly_index) else: auxiliary_heat_runtime = None emergency_heat_runtime = None # load outdoor temperatures station = zipcode_to_usaf_station(zipcode) if station is None: message = "Could not locate a valid source of outdoor temperature " \ "data for ZIP code {}".format(zipcode) raise ValueError(message) ws_hourly = ISDWeatherSource(station) utc_offset = dateutil.parser.parse("2000-01-01T00:00:00" + utc_offset).tzinfo.utcoffset(None) temp_out = ws_hourly.indexed_temperatures(hourly_index_utc - utc_offset, "degF") temp_out.index = hourly_index # load daily time series values if cooling: cool_runtime = pd.Series(df["cool_runtime"].values, daily_index) else: cool_runtime = None if heating: heat_runtime = pd.Series(df["heat_runtime"].values, daily_index) else: heat_runtime = None # create thermostat instance thermostat = Thermostat(thermostat_id, equipment_type, zipcode, station, temp_in, temp_out, cooling_setpoint, heating_setpoint, cool_runtime, heat_runtime, auxiliary_heat_runtime, emergency_heat_runtime) return thermostat
def test_isd_weather_cache_01(): ws = ISDWeatherSource("722660", 2012, 2012) assert 17542 == len(ws.data) assert 23551 == len(ws.get_temperature_set().fetchall()) global ws_pk assert ws.weather_station_pk == ws_pk
def setUpTestData(cls): cls.user = User.objects.create_user('john', '*****@*****.**', 'johnpassword') cls.project = create_project(spec={ "project_id": "ABCD", "project_owner": cls.user.projectowner, "baseline_period_end": datetime(2012, 1, 1, tzinfo=pytz.UTC), "reporting_period_start": datetime(2012, 2, 1, tzinfo=pytz.UTC), "zipcode": "91104", "traces": [ { "interpretation": "NG_C_S", "unit": "THM", "start": "2010-01-01", "end": "2014-12-31", "freq": "MS", "value": 1, "nans": set(range(0, 60, 20)), "estimated": set(range(3, 60, 15)), }, { "interpretation": "NG_C_S", "unit": "THM", "start": "2011-09-01", "end": "2014-12-31", "freq": "D", "value": 2, "nans": set(range(0, 1000, 20)), "estimated": set(range(3, 1000, 15)), }, { "interpretation": "E_C_S", "unit": "KWH", "start": "2011-01-01", "end": "2014-12-31", "freq": "15T", "value": 0.04, "nans": set(range(0, 96*365*4, 200)), "estimated": set(range(3, 96*365*4, 150)), }, { "interpretation": "E_C_S", "unit": "KWH", "start": "2011-01-01", "end": "2014-12-31", "freq": "H", "value": 0.4, "nans": set(range(0, 96*365*4, 200)), "estimated": set(range(3, 96*365*4, 150)), }, { "interpretation": "E_OSG_U", "unit": "KWH", "start": "2012-01-15", "end": "2014-12-31", "freq": "H", "value": 0.3, "nans": set(range(0, 96*365*4, 200)), "estimated": set(range(3, 96*365*4, 150)), }, { "interpretation": "E_OSG_U", "unit": "KWH", "start": "2010-01-01", "end": "2014-12-31", "freq": "30T", "value": 0.1, "nans": set(range(0, 96*365*4, 200)), "estimated": set(range(3, 96*365*4, 150)), }, ], }) cls.project.run_meter() tmp_dir = tempfile.mkdtemp() wns = TMY3WeatherSource("724838", tmp_dir, preload=False) wns.client = MockWeatherClient() wns._load_data() cls.weather_normal_source = wns tmp_dir = tempfile.mkdtemp() ws = ISDWeatherSource("722880", tmp_dir) ws.client = MockWeatherClient() cls.weather_source = ws
def setUpTestData(cls): cls.user = User.objects.create_user('john', '*****@*****.**', 'johnpassword') cls.project = create_project( spec={ "project_id": "ABCD", "project_owner": cls.user.projectowner, "baseline_period_end": datetime(2012, 1, 1, tzinfo=pytz.UTC), "reporting_period_start": datetime(2012, 2, 1, tzinfo=pytz.UTC), "zipcode": "91104", "traces": [ { "interpretation": "NG_C_S", "unit": "THM", "start": "2010-01-01", "end": "2014-12-31", "freq": "MS", "value": 1, "nans": set(range(0, 60, 20)), "estimated": set(range(3, 60, 15)), }, { "interpretation": "NG_C_S", "unit": "THM", "start": "2011-09-01", "end": "2014-12-31", "freq": "D", "value": 2, "nans": set(range(0, 1000, 20)), "estimated": set(range(3, 1000, 15)), }, { "interpretation": "E_C_S", "unit": "KWH", "start": "2011-01-01", "end": "2014-12-31", "freq": "15T", "value": 0.04, "nans": set(range(0, 96 * 365 * 4, 200)), "estimated": set(range(3, 96 * 365 * 4, 150)), }, { "interpretation": "E_C_S", "unit": "KWH", "start": "2011-01-01", "end": "2014-12-31", "freq": "H", "value": 0.4, "nans": set(range(0, 96 * 365 * 4, 200)), "estimated": set(range(3, 96 * 365 * 4, 150)), }, { "interpretation": "E_OSG_U", "unit": "KWH", "start": "2012-01-15", "end": "2014-12-31", "freq": "H", "value": 0.3, "nans": set(range(0, 96 * 365 * 4, 200)), "estimated": set(range(3, 96 * 365 * 4, 150)), }, { "interpretation": "E_OSG_U", "unit": "KWH", "start": "2010-01-01", "end": "2014-12-31", "freq": "30T", "value": 0.1, "nans": set(range(0, 96 * 365 * 4, 200)), "estimated": set(range(3, 96 * 365 * 4, 150)), }, ], }) cls.project.run_meter() tmp_dir = tempfile.mkdtemp() wns = TMY3WeatherSource("724838", tmp_dir, preload=False) wns.client = MockWeatherClient() wns._load_data() cls.weather_normal_source = wns tmp_dir = tempfile.mkdtemp() ws = ISDWeatherSource("722880", tmp_dir) ws.client = MockWeatherClient() cls.weather_source = ws
def get_single_thermostat(thermostat_id, zipcode, equipment_type, utc_offset, interval_data_filename): """ Load a single thermostat directly from an interval data file. Parameters ---------- thermostat_id : str A unique identifier for the thermostat. zipcode : str The zipcode of the thermostat, e.g. `"01234"`. equipment_type : str The equipment type of the thermostat. utc_offset : str A string representing the UTC offset of the interval data, e.g. `"-0700"`. Could also be `"Z"` (UTC), or just `"+7"` (equivalent to `"+0700"`), or any other timezone format recognized by the library method dateutil.parser.parse. interval_data_filename : str The path to the CSV in which the interval data is stored. Returns ------- thermostat : thermostat.Thermostat The loaded thermostat object. """ df = pd.read_csv(interval_data_filename) heating, cooling, aux_emerg = _get_equipment_type(equipment_type) # load indices dates = pd.to_datetime(df["date"]) daily_index = pd.DatetimeIndex(start=dates[0], periods = dates.shape[0], freq="D") hourly_index = pd.DatetimeIndex(start=dates[0], periods = dates.shape[0] * 24, freq="H") hourly_index_utc = pd.DatetimeIndex(start=dates[0], periods = dates.shape[0] * 24, freq="H", tz=pytz.UTC) # raise an error if dates are not aligned if not all(dates == daily_index): message("Dates provided for thermostat_id={} may contain some " "which are out of order, missing, or duplicated.".format(thermostat_id)) raise ValueError(message) # load hourly time series values temp_in = pd.Series(_get_hourly_block(df, "temp_in"), hourly_index) if heating: heating_setpoint = pd.Series(_get_hourly_block(df, "heating_setpoint"), hourly_index) else: heating_setpoint = None if cooling: cooling_setpoint = pd.Series(_get_hourly_block(df, "cooling_setpoint"), hourly_index) else: cooling_setpoint = None if aux_emerg: auxiliary_heat_runtime = pd.Series(_get_hourly_block(df, "auxiliary_heat_runtime"), hourly_index) emergency_heat_runtime = pd.Series(_get_hourly_block(df, "emergency_heat_runtime"), hourly_index) else: auxiliary_heat_runtime = None emergency_heat_runtime = None # load outdoor temperatures station = zipcode_to_usaf_station(zipcode) if station is None: message = "Could not locate a valid source of outdoor temperature " \ "data for ZIP code {}".format(zipcode) raise ValueError(message) ws_hourly = ISDWeatherSource(station) utc_offset = dateutil.parser.parse("2000-01-01T00:00:00" + utc_offset).tzinfo.utcoffset(None) temp_out = ws_hourly.indexed_temperatures(hourly_index_utc - utc_offset, "degF") temp_out.index = hourly_index # load daily time series values if cooling: cool_runtime = pd.Series(df["cool_runtime"].values, daily_index) else: cool_runtime = None if heating: heat_runtime = pd.Series(df["heat_runtime"].values, daily_index) else: heat_runtime = None # create thermostat instance thermostat = Thermostat( thermostat_id, equipment_type, zipcode, station, temp_in, temp_out, cooling_setpoint, heating_setpoint, cool_runtime, heat_runtime, auxiliary_heat_runtime, emergency_heat_runtime ) return thermostat
def mock_isd_weather_source(): tmp_url = "sqlite:///{}/weather_cache.db".format(tempfile.mkdtemp()) ws = ISDWeatherSource('722880', tmp_url) ws.client = MockWeatherClient() return ws
def read_meter_data(trace_filename, project_info_filename, project_id=None, weather=True, merge_series=True): """Read meter data from a raw XML file source, obtain matching project information from a separate CSV file. Fetches the corresponding weather data, when requested, too. Parameters ========== trace_filename: str Filename of XML meter trace. project_info_filename: str Filename of CSV file containing project info. project_id: str Manually provide the project ID used in `project_info_filename`. If `None`, the first part of `trace_filename` before a `_` is used. weather: bool `True` will obtain weather (temperature) data. merge_series: bool `True` will return a `pandas.DataFrame` with merged consumption and temperature data. Returns ======= A `DataCollection` object with the following fields: project_info: `pandas.DataFrame` Contains columns for project properties. baseline_end: `pandas.Datetime` End date of the baseline period. consumption_data: `eemeter.consumption.ConsumptionData` Consumption data object. consumption_data_freq: `pandas.DataFrame` Consumption data with normalized frequency. If :samp:`weather=True`: weather_source: `eemeter.ISDWeatherSource` Weather source object. weather_data: `pandas.DataFrame` Temperature observations in, degF, with frequency matching `consumption_data_freq`. Values are averaged if raw temperature observations are lower frequency. If :samp:`merge_series=True`: cons_weather_data: `pandas.DataFrame` Merged consumption and temperature data. """ from eemeter.meter import DataCollection, DataContainer from eemeter.parsers import ESPIUsageParser # TODO: New API. #from eemeter.structures import ( # EnergyTrace, # EnergyTraceSet, # Intervention, # ZIPCodeSite, # Project #) #from eemeter.io.parsers import ESPIUsageParser from eemeter.weather import ISDWeatherSource import pandas as pd import os with open(trace_filename, 'r') as f: parser = ESPIUsageParser(f.read()) consumption_datas = list(parser.get_consumption_data_objects()) cons_data_obj = consumption_datas[0] all_projects_info = pd.read_csv(project_info_filename) fuel_type_map = {'electricity': 'E', 'natural_gas': 'NG'} if project_id is None: project_id = os.path.basename(trace_filename).split("_")[0] fuel_type = fuel_type_map[cons_data_obj.fuel_type] project_info = all_projects_info.query('project_id == "{}" and\ fuel_type == "{}"'.format( project_id, fuel_type)) baseline_end = pd.to_datetime(project_info.baseline_period_end.tolist()[0], utc=True) # Sometimes the data have differing observation frequencies, # so choose the most common one (in the usage data) and align # everything to that. cons_index_diff = cons_data_obj.data.index.to_series().diff(periods=1) new_freq = pd.value_counts(cons_index_diff).argmax() cons_data = cons_data_obj.data.tz_convert("UTC") cons_data = cons_data.resample(new_freq).mean() res = DataCollection(project_info=project_info, baseline_end=baseline_end, consumption_data=cons_data_obj, consumption_data_freq=cons_data) if weather: station = unicode(project_info.weather_station.tolist()[0]) ws = ISDWeatherSource(station) res.add_data(DataContainer("weather_source", ws, None)) ws.add_year_range(cons_data.index.min().year, cons_data.index.max().year) weather_data = ws._unit_convert(ws.tempC, "degF").tz_localize("UTC") weather_data = weather_data.resample(new_freq).mean() res.add_data(DataContainer("weather_data", weather_data, None)) if weather_data.empty: raise ValueError("No weather data") if merge_series: cons_weather_data = pd.concat([cons_data, weather_data], axis=1, join="inner") cons_weather_data.columns = ['usage', 'temp'] res.add_data( DataContainer("cons_weather_data", cons_weather_data, None)) return res