Exemplo n.º 1
0
def test_has_trace_id_and_interval():
    et = EnergyTrace(interpretation='ELECTRICITY_CONSUMPTION_SUPPLIED',
                     placeholder=True)
    assert et.trace_id is None
    assert et.interval is None

    et = EnergyTrace(interpretation='ELECTRICITY_CONSUMPTION_SUPPLIED',
                     placeholder=True,
                     trace_id='ABC',
                     interval='daily')
    assert et.trace_id == 'ABC'
    assert et.interval == 'daily'

    assert 'ABC' in str(et)
Exemplo n.º 2
0
def test_placeholder_valid(interpretation):
    et = EnergyTrace(interpretation=interpretation, placeholder=True)

    assert et.interpretation == interpretation
    assert et.data is None
    assert et.unit is None
    assert et.placeholder
Exemplo n.º 3
0
def test_serializer(interpretation, records, unit, serializer):

    et = EnergyTrace(interpretation=interpretation, records=records, unit=unit,
                     serializer=serializer)

    assert et.data.value.iloc[0] == records[0]['value']
    assert not et.data.estimated.iloc[0]
Exemplo n.º 4
0
 def eemeter_consumption_data(self):
     records = [r.eemeter_record() for r in self.records.all()]
     interpretation = dict(INTERPRETATION_CHOICES)[self.interpretation]
     unit_name = dict(UNIT_CHOICES)[self.unit]
     return EnergyTrace(interpretation,
                        records=records,
                        unit=unit_name,
                        serializer=ArbitraryStartSerializer())
Exemplo n.º 5
0
def trace2():
    data = {"value": [np.nan], "estimated": [True]}
    columns = ["value", "estimated"]
    index = [
        datetime(2011, 1, 1, tzinfo=pytz.UTC),
    ]
    df = pd.DataFrame(data, index=index, columns=columns)
    return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
Exemplo n.º 6
0
def test_non_timeseries_data(interpretation, unit):

    data = {"value": [1, np.nan], "estimated": [False, False]}
    columns = ["value", "estimated"]

    df = pd.DataFrame(data, columns=columns)

    with pytest.raises(ValueError):
        EnergyTrace(interpretation=interpretation, data=df, unit=unit)
def trace():
    data = {
        "value": np.tile(1, (365, )),
        "estimated": np.tile(False, (365, )),
    }
    columns = ["value", "estimated"]
    index = pd.date_range('2000-01-01', periods=365, freq='D', tz=pytz.UTC)
    df = pd.DataFrame(data, index=index, columns=columns)
    return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
Exemplo n.º 8
0
def trace1():

    columns = {"value": [1, np.nan], "estimated": [False, False]}
    column_names = ["value", "estimated"]
    index = pd.date_range('2000-01-01', periods=2, freq='D')
    data = pd.DataFrame(columns, index=index, columns=column_names)

    return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", data=data,
                       unit="KWH")
Exemplo n.º 9
0
def test_bad_column_name_data(interpretation, unit):

    data = {"energy": [1, np.nan], "estimated": [False, False]}
    columns = ["energy", "estimated"]
    index = pd.date_range('2000-01-01', periods=2, freq='D')

    df = pd.DataFrame(data, index=index, columns=columns)

    with pytest.raises(ValueError):
        EnergyTrace(interpretation=interpretation, data=df, unit=unit)
Exemplo n.º 10
0
def fifteen_min_trace():
    trace_length = 9600
    data = {
        "value": [1 for _ in range(trace_length)],
        "estimated": [False for _ in range(trace_length)]
    }
    columns = ["value", "estimated"]
    index = pd.date_range(start=datetime(2011, 1, 1, tzinfo=pytz.UTC),
                          periods=trace_length,
                          freq='15T',
                          tz=pytz.UTC)
    df = pd.DataFrame(data, index=index, columns=columns)
    return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
Exemplo n.º 11
0
def trace():
    index = pd.date_range('6/6/2012','6/6/2013',freq='M',
        tz=pytz.UTC)

    data = pd.DataFrame(
        {
            "value": [1,] * 12,
            "estimated": [False,] * 12
        }, index=index, columns=['value', 'estimated'])

    return EnergyTrace(
        interpretation="NATURAL_GAS_CONSUMPTION_SUPPLIED",
        unit="THERM", data=data)
Exemplo n.º 12
0
def trace_set():
    columns = {
        "value": [1, 1, 1, 1, np.nan],
        "estimated": [False, False, False, False, False]
    }
    column_names = ["value", "estimated"]
    index = pd.date_range('2000-01-01', periods=5, freq='D')
    data = pd.DataFrame(columns, index=index, columns=column_names)

    trace = EnergyTrace("ELECTRICITY_ON_SITE_GENERATION_UNCONSUMED", data=data,
                        unit="KWH")

    return EnergyTraceSet([trace], ["trace"])
Exemplo n.º 13
0
def build_trace(trace_records):
    if trace_records[0]['interpretation'] == 'gas':
        unit = "THM"
        interpretation = "NATURAL_GAS_CONSUMPTION_SUPPLIED"
    else:
        unit = "KWH"
        interpretation = "ELECTRICITY_CONSUMPTION_SUPPLIED"
    trace_object = EnergyTrace(records=trace_records,
                               unit=unit,
                               interpretation=interpretation,
                               serializer=ArbitraryStartSerializer(),
                               trace_id=trace_records[0]['project_id'])
    return trace_object
Exemplo n.º 14
0
def trace():
    index = pd.DatetimeIndex(
        ["2012-06-06", "2012-07-06", "2012-08-06", "2012-09-06"],
        dtype='datetime64[ns, UTC]', freq=None)

    data = pd.DataFrame(
        {
            "value": [1, 1, 1, np.nan],
            "estimated": [False, False, False, False]
        }, index=index, columns=['value', 'estimated'])

    return EnergyTrace(
        interpretation="NATURAL_GAS_CONSUMPTION_SUPPLIED",
        unit="THERM", data=data)
Exemplo n.º 15
0
def test_data_and_valid_unit(
        interpretation, unnormalized_unit_with_target_unit, unit_timeseries):

    unnormalized_unit, normalized_unit, mult = \
        unnormalized_unit_with_target_unit

    et = EnergyTrace(interpretation=interpretation, data=unit_timeseries,
                     unit=unnormalized_unit)
    assert et.interpretation == interpretation
    assert et.unit == normalized_unit
    np.testing.assert_allclose(
            et.data.value.iloc[0], (unit_timeseries.value * mult).iloc[0],
            rtol=1e-3, atol=1e-3)
    assert not et.data.estimated.iloc[0]
    assert not et.placeholder
Exemplo n.º 16
0
    def get_energy_traces(self, service_kind_default="electricity"):
        ''' Retrieve all energy trace records stored as IntervalReading
        elements in the given ESPI Energy Usage XML.

        Energy records are grouped by interpretation and returned in
        EnergyTrace objects.

        Parameters
        ----------
        service_kind_default : str
            Default fuel type to use in parser if ReadingType/commodity field
            is missing.

        Yields
        ------
        energy_trace : eemeter.structures.EnergyTrace
            Energy data traces as described in the xml file.
        '''

        INTERPRETATION_MAPPING = {
            ("electricity", "forward"): "ELECTRICITY_CONSUMPTION_SUPPLIED",
            ("natural_gas", "forward"): "NATURAL_GAS_CONSUMPTION_SUPPLIED",
            ("electricity", "reverse"):
            "ELECTRICITY_ON_SITE_GENERATION_UNCONSUMED",
            ("electriicty", "net"): "ELECTRICITY_CONSUMPTION_NET",
        }

        # Get all consumption records, group by fuel type.
        for flow_direction, records in self._get_consumption_record_groups():

            if len(records) > 0:
                fuel_type_records = defaultdict(list)
                for record in records:
                    fuel_type_records[record["fuel_type"]].append(record)

                # Wrap records in EnergyTrace objects, by fuel type.
                for fuel_type, records in fuel_type_records.items():
                    if fuel_type is None:
                        fuel_type = service_kind_default
                    selector = (fuel_type, flow_direction)
                    interpretation = INTERPRETATION_MAPPING[selector]
                    yield EnergyTrace(interpretation,
                                      records=records,
                                      unit=records[0]["unit_name"],
                                      serializer=ArbitrarySerializer())
Exemplo n.º 17
0
def billing_trace():
    data = {
        "value": [1, 1, 1, 1, np.nan] + [
            1,
        ] * 13,
        "estimated": [False, False, True, False, False] + [
            False,
        ] * 13
    }
    columns = ["value", "estimated"]
    index = [
        datetime(2011, 1, 1, tzinfo=pytz.UTC),
        datetime(2011, 2, 1, tzinfo=pytz.UTC),
        datetime(2011, 3, 2, tzinfo=pytz.UTC),
        datetime(2011, 4, 3, tzinfo=pytz.UTC),
        datetime(2011, 4, 29, tzinfo=pytz.UTC),
    ] + [
        datetime(2011, 6, 1, tzinfo=pytz.UTC) + timedelta(days=30 * i)
        for i in range(13)
    ]
    df = pd.DataFrame(data, index=index, columns=columns)
    return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
Exemplo n.º 18
0
def _deserialize_single_trace(trace):

    # verify type
    type_ = trace.get('type', None)
    if type_ is None:
        return {'error': 'Serialization "type" not given for trace.'}

    # check for "interpretation" key
    interpretation = trace.get('interpretation', None)
    if interpretation is None:
        return {
            'error':
            ('Trace serializations must provide key "interpretation".')
        }

    # check for "unit" key
    unit = trace.get('unit', None)
    if unit is None:
        return {'error': ('Trace serializations must provide key "unit".')}

    # check for "records" key
    records = trace.get('records', None)
    if records is None:
        return {'error': ('Trace serializations must provide key "records".')}

    # check for optional "trace_id" key
    trace_id = trace.get('trace_id', None)

    # check for optional "interval" key
    interval = trace.get('interval', None)

    # switch on type
    if type_ == 'ARBITRARY':
        return {
            "trace":
            EnergyTrace(
                interpretation=interpretation,
                unit=unit,
                records=records,
                serializer=ArbitrarySerializer(parse_dates=True),
                trace_id=trace_id,
                interval=interval,
            )
        }
    elif type_ == 'ARBITRARY_START':
        return {
            "trace":
            EnergyTrace(
                interpretation=interpretation,
                unit=unit,
                records=records,
                serializer=ArbitraryStartSerializer(parse_dates=True),
                trace_id=trace_id,
                interval=interval,
            )
        }
    elif type_ == 'ARBITRARY_END':
        return {
            "trace":
            EnergyTrace(
                interpretation=interpretation,
                unit=unit,
                records=records,
                serializer=ArbitraryEndSerializer(parse_dates=True),
                trace_id=trace_id,
                interval=interval,
            )
        }
    else:
        return {
            'error':
            ('Serialization type "{}" not recognized for trace.'.format(type_))
        }
Exemplo n.º 19
0
def test_no_data_no_placeholder(interpretation):
    with pytest.raises(ValueError):
        EnergyTrace(interpretation=interpretation)
Exemplo n.º 20
0
def test_repr(interpretation):
    et = EnergyTrace(interpretation=interpretation, placeholder=True)
    assert 'EnergyTrace' in str(et)
Exemplo n.º 21
0
def placeholder_trace_set():
    trace = EnergyTrace("ELECTRICITY_ON_SITE_GENERATION_UNCONSUMED",
                        placeholder=True)

    return EnergyTraceSet([trace], ["trace"])
Exemplo n.º 22
0
def test_data_and_placeholder(interpretation):
    with pytest.raises(ValueError):
        EnergyTrace(interpretation=interpretation,
                    data=pd.DataFrame(),
                    placeholder=True)
Exemplo n.º 23
0
def test_invalid_interpretation():
    with pytest.raises(ValueError):
        EnergyTrace(interpretation="INVALID", placeholder=True)
def energy_trace_set(daily_data):
    energy_trace_set = EnergyTraceSet([
        EnergyTrace('ELECTRICITY_CONSUMPTION_SUPPLIED', data=daily_data,
                    unit='kWh'),
    ])
    return energy_trace_set
Exemplo n.º 25
0
    def fit(self, weather_source):
        ''' Fit all models associated with this trace.

        Parameters
        ----------
        weather_source : eemeter.weather.ISDWeatherSource
            Weather source to use in creating covariate data.
        '''

        for modeling_period_label, modeling_period in \
                self.modeling_period_set.iter_modeling_periods():

            filtered_data = self._filter_by_modeling_period(
                self.trace, modeling_period)
            filtered_trace = EnergyTrace(
                self.trace.interpretation, data=filtered_data,
                unit=self.trace.unit)

            model = self.model_mapping[modeling_period_label]

            try:
                input_data = self.formatter.create_input(
                    filtered_trace, weather_source)
            except:
                logger.warn(
                    'For trace "{}" and modeling_period "{}", was not'
                    ' able to format input data for {}.'
                    .format(self.trace.interpretation, modeling_period_label,
                            model)
                )
                self.fit_outputs[modeling_period_label] = {
                    "status": "FAILURE",
                    "traceback": traceback.format_exc(),
                    "start_date": None,
                    "end_date": None,
                    "rows": None,
                }
                continue
            else:
                input_description = self.formatter.describe_input(input_data)
                outputs = {
                    "start_date": input_description.get('start_date'),
                    "end_date": input_description.get('end_date'),
                    "n_rows": input_description.get('n_rows'),
                }

            try:
                outputs.update(model.fit(input_data))
            except:
                logger.warn(
                    'For trace "{}" and modeling_period "{}", {} was not'
                    ' able to fit using input data: {}'
                    .format(self.trace.interpretation, modeling_period_label,
                            model, input_data)
                )

                outputs.update({
                    "status": "FAILURE",
                    "traceback": traceback.format_exc(),
                })
            else:
                logger.info(
                    'Successfully fitted {} to formatted input data for'
                    ' trace "{}" and modeling_period "{}".'
                    .format(model, self.trace.interpretation,
                            modeling_period_label)
                )
                outputs.update({"status": "SUCCESS"})

            self.fit_outputs[modeling_period_label] = outputs

        return self.fit_outputs
Exemplo n.º 26
0
    def fit(self, weather_source):
        ''' Fit all models associated with this trace.

        Parameters
        ----------
        weather_source : eemeter.weather.ISDWeatherSource
            Weather source to use in creating covariate data.
        '''

        for modeling_period_label, modeling_period in \
                self.modeling_period_set.iter_modeling_periods():

            filtered_data = self._filter_by_modeling_period(
                self.trace, modeling_period)
            filtered_trace = EnergyTrace(self.trace.interpretation,
                                         data=filtered_data,
                                         unit=self.trace.unit)

            model = self.model_mapping[modeling_period_label]

            outputs = {
                "status": None,
                "traceback": None,
                "input_data": None,
                "start_date": None,
                "end_date": None,
                "n_rows": None,
                "model_fit": {},
            }

            # fail with DataSufficiencyException if bad weather source
            if weather_source is None:
                message = (
                    'No weather source found for trace {} in {} period'.format(
                        self.trace.trace_id, modeling_period_label))
                logger.warn(message)
                try:
                    raise model_exceptions.DataSufficiencyException(message)
                except:
                    outputs.update({
                        "status": "FAILURE",
                        "traceback": traceback.format_exc(),
                    })
                    self.fit_outputs[modeling_period_label] = outputs
                continue

            # attempt to create input data
            try:
                input_data = self.formatter.create_input(
                    filtered_trace, weather_source)
            except:
                logger.warn(
                    'Input data formatting failed for trace {} in {} period.'.
                    format(self.trace.trace_id, modeling_period_label))
                outputs.update({
                    "status": "FAILURE",
                    "traceback": traceback.format_exc(),
                })
            else:
                input_description = self.formatter.describe_input(input_data)
                input_serialization = self.formatter.serialize_input(
                    input_data)
                input_mask = self.formatter.get_input_data_mask(input_data)
                outputs.update({
                    "input_data_serialization":
                    input_serialization,
                    "input_mask":
                    input_mask,  # missing days
                    "start_date":
                    input_description.get('start_date'),
                    "end_date":
                    input_description.get('end_date'),
                    "n_rows":
                    input_description.get('n_rows'),
                    "trace":
                    filtered_trace,
                })

                try:
                    model_fit = model.fit(input_data)
                except:
                    tb = traceback.format_exc()
                    logger.warn(
                        '{} fit failed for trace {} in {} period.'.format(
                            model, self.trace.trace_id, modeling_period_label))

                    outputs.update({
                        "status": "FAILURE",
                        "traceback": tb,
                    })
                else:
                    logger.debug(
                        '{} fit successful for trace {} in {} period.'.format(
                            model, self.trace.trace_id, modeling_period_label))
                    outputs["model_fit"].update(model_fit)
                    outputs.update({
                        "status": "SUCCESS",
                    })

            self.fit_outputs[modeling_period_label] = outputs

        return self.fit_outputs
Exemplo n.º 27
0
def test_valid_interpretation(valid_interpretation):
    et = EnergyTrace(interpretation=valid_interpretation, placeholder=True)

    assert et.interpretation == valid_interpretation
Exemplo n.º 28
0
def hourly_trace():
    data = {"value": [1, 1, np.nan], "estimated": [False, False, False]}
    columns = ["value", "estimated"]
    index = pd.date_range('2000-01-01', periods=3, freq='H', tz=pytz.UTC)
    df = pd.DataFrame(data, index=index, columns=columns)
    return EnergyTrace("ELECTRICITY_CONSUMPTION_SUPPLIED", df, unit="KWH")
Exemplo n.º 29
0
def test_data_but_no_unit(interpretation):
    with pytest.raises(ValueError):
        EnergyTrace(interpretation=interpretation, data=pd.DataFrame())
Exemplo n.º 30
0
def test_data_but_invalid_unit(interpretation):
    with pytest.raises(ValueError):
        EnergyTrace(interpretation=interpretation,
                    data=pd.DataFrame(),
                    unit="INVALID")