def test_iterate_segmented_dataset_with_processor(dataset, segmentation): feature_processor_segment_names = [] def feature_processor(segment_name, dataset, column_mapping=None): # rename some columns feature_processor_segment_names.append(segment_name) return dataset.rename(columns=column_mapping).assign(weight=1) iterator = iterate_segmented_dataset( dataset, segmentation=segmentation, feature_processor=feature_processor, feature_processor_kwargs={"column_mapping": { "a": "c", "b": "d" }}, feature_processor_segment_name_mapping={ "jan": "jan2", "feb": "feb2" }, ) segment_name, data = next(iterator) assert feature_processor_segment_names == ["jan2"] assert segment_name == "jan" assert list(data.columns) == ["c", "d", "weight"] assert data.shape == (1000, 3) assert data.sum().sum() == 4000.0 segment_name, data = next(iterator) assert feature_processor_segment_names == ["jan2", "feb2"] assert segment_name == "feb" assert list(data.columns) == ["c", "d", "weight"] assert data.shape == (1000, 3) assert data.sum().sum() == 4000.0
def test_iterate_segmented_dataset_no_segmentation(dataset): iterator = iterate_segmented_dataset(dataset, segmentation=None) segment_name, data = next(iterator) assert segment_name is None assert list(data.columns) == ["a", "b", "weight"] assert data.shape == (1000, 3) assert data.sum().sum() == 4000 with pytest.raises(StopIteration): next(iterator)
def create_caltrack_hourly_segmented_design_matrices(preliminary_design_matrix, segmentation, occupancy_lookup, temperature_bins): return { segment_name: segmented_data for segment_name, segmented_data in iterate_segmented_dataset( preliminary_design_matrix, segmentation=segmentation, feature_processor=caltrack_hourly_fit_feature_processor, feature_processor_kwargs={ "occupancy_lookup": occupancy_lookup, "temperature_bins": temperature_bins, }, ) }
def create_caltrack_hourly_segmented_design_matrices( preliminary_design_matrix, segmentation, occupancy_lookup, occupied_temperature_bins, unoccupied_temperature_bins, ): """A helper function which calls basic feature creation methods to create a design matrix suitable for use with segmented CalTRACK hourly models. Parameters ---------- preliminary_design_matrix : :any:`pandas.DataFrame` A dataframe of the form returned by :any:`eemeter.create_caltrack_hourly_preliminary_design_matrix`. segmentation : :any:`pandas.DataFrame` Weights for each segment. This is a dataframe of the form returned by :any:`eemeter.segment_time_series` on the `preliminary_design_matrix`. occupancy_lookup : any:`pandas.DataFrame` Occupancy for each segment. This is a dataframe of the form returned by :any:`eemeter.estimate_hour_of_week_occupancy`. occupied_temperature_bins : :any:`` Occupied temperature bin settings for each segment. This is a dataframe of the form returned by :any:`eemeter.fit_temperature_bins`. unoccupied_temperature_bins : :any:`` Ditto, for unoccupied. Returns ------- design_matrix : :any:`dict` of :any:`pandas.DataFrame` A dict of design matrixes created using the :any:`eemeter.caltrack_hourly_fit_feature_processor`. """ return { segment_name: segmented_data for segment_name, segmented_data in iterate_segmented_dataset( preliminary_design_matrix, segmentation=segmentation, feature_processor=caltrack_hourly_fit_feature_processor, feature_processor_kwargs={ "occupancy_lookup": occupancy_lookup, "occupied_temperature_bins": occupied_temperature_bins, "unoccupied_temperature_bins": unoccupied_temperature_bins, }, ) }
def test_iterate_segmented_dataset_with_segmentation(dataset, segmentation): iterator = iterate_segmented_dataset(dataset, segmentation=segmentation) segment_name, data = next(iterator) assert segment_name == "jan" assert list(data.columns) == ["a", "b", "weight"] assert data.shape == (744, 3) assert data.sum().sum() == 2976.0 segment_name, data = next(iterator) assert segment_name == "feb" assert list(data.columns) == ["a", "b", "weight"] assert data.shape == (256, 3) assert data.sum().sum() == 1024.0 segment_name, data = next(iterator) assert segment_name == "mar" assert list(data.columns) == ["a", "b", "weight"] assert data.shape == (0, 3) assert data.sum().sum() == 0.0