def test_iterate_segmented_dataset_with_processor(dataset, segmentation):
    feature_processor_segment_names = []

    def feature_processor(segment_name,
                          dataset,
                          column_mapping=None):  # rename some columns
        feature_processor_segment_names.append(segment_name)
        return dataset.rename(columns=column_mapping).assign(weight=1)

    iterator = iterate_segmented_dataset(
        dataset,
        segmentation=segmentation,
        feature_processor=feature_processor,
        feature_processor_kwargs={"column_mapping": {
            "a": "c",
            "b": "d"
        }},
        feature_processor_segment_name_mapping={
            "jan": "jan2",
            "feb": "feb2"
        },
    )
    segment_name, data = next(iterator)
    assert feature_processor_segment_names == ["jan2"]
    assert segment_name == "jan"
    assert list(data.columns) == ["c", "d", "weight"]
    assert data.shape == (1000, 3)
    assert data.sum().sum() == 4000.0

    segment_name, data = next(iterator)
    assert feature_processor_segment_names == ["jan2", "feb2"]
    assert segment_name == "feb"
    assert list(data.columns) == ["c", "d", "weight"]
    assert data.shape == (1000, 3)
    assert data.sum().sum() == 4000.0
def test_iterate_segmented_dataset_no_segmentation(dataset):
    iterator = iterate_segmented_dataset(dataset, segmentation=None)
    segment_name, data = next(iterator)
    assert segment_name is None
    assert list(data.columns) == ["a", "b", "weight"]
    assert data.shape == (1000, 3)
    assert data.sum().sum() == 4000

    with pytest.raises(StopIteration):
        next(iterator)
示例#3
0
def create_caltrack_hourly_segmented_design_matrices(preliminary_design_matrix,
                                                     segmentation,
                                                     occupancy_lookup,
                                                     temperature_bins):
    return {
        segment_name: segmented_data
        for segment_name, segmented_data in iterate_segmented_dataset(
            preliminary_design_matrix,
            segmentation=segmentation,
            feature_processor=caltrack_hourly_fit_feature_processor,
            feature_processor_kwargs={
                "occupancy_lookup": occupancy_lookup,
                "temperature_bins": temperature_bins,
            },
        )
    }
示例#4
0
def create_caltrack_hourly_segmented_design_matrices(
    preliminary_design_matrix,
    segmentation,
    occupancy_lookup,
    occupied_temperature_bins,
    unoccupied_temperature_bins,
):
    """A helper function which calls basic feature creation methods to create a
    design matrix suitable for use with segmented CalTRACK hourly models.

    Parameters
    ----------
    preliminary_design_matrix : :any:`pandas.DataFrame`
        A dataframe of the form returned by
        :any:`eemeter.create_caltrack_hourly_preliminary_design_matrix`.
    segmentation : :any:`pandas.DataFrame`
        Weights for each segment. This is a dataframe of the form returned by
        :any:`eemeter.segment_time_series` on the `preliminary_design_matrix`.
    occupancy_lookup : any:`pandas.DataFrame`
        Occupancy for each segment. This is a dataframe of the form returned by
        :any:`eemeter.estimate_hour_of_week_occupancy`.
    occupied_temperature_bins : :any:``
        Occupied temperature bin settings for each segment. This is a dataframe of the
        form returned by :any:`eemeter.fit_temperature_bins`.
    unoccupied_temperature_bins : :any:``
        Ditto, for unoccupied.

    Returns
    -------
    design_matrix : :any:`dict` of :any:`pandas.DataFrame`
        A dict of design matrixes created using the
        :any:`eemeter.caltrack_hourly_fit_feature_processor`.
    """
    return {
        segment_name: segmented_data
        for segment_name, segmented_data in iterate_segmented_dataset(
            preliminary_design_matrix,
            segmentation=segmentation,
            feature_processor=caltrack_hourly_fit_feature_processor,
            feature_processor_kwargs={
                "occupancy_lookup": occupancy_lookup,
                "occupied_temperature_bins": occupied_temperature_bins,
                "unoccupied_temperature_bins": unoccupied_temperature_bins,
            },
        )
    }
def test_iterate_segmented_dataset_with_segmentation(dataset, segmentation):
    iterator = iterate_segmented_dataset(dataset, segmentation=segmentation)
    segment_name, data = next(iterator)
    assert segment_name == "jan"
    assert list(data.columns) == ["a", "b", "weight"]
    assert data.shape == (744, 3)
    assert data.sum().sum() == 2976.0

    segment_name, data = next(iterator)
    assert segment_name == "feb"
    assert list(data.columns) == ["a", "b", "weight"]
    assert data.shape == (256, 3)
    assert data.sum().sum() == 1024.0

    segment_name, data = next(iterator)
    assert segment_name == "mar"
    assert list(data.columns) == ["a", "b", "weight"]
    assert data.shape == (0, 3)
    assert data.sum().sum() == 0.0