def dataset_with_operations(fillna_col0_col1, fillna_col1_col4) -> Dataset: """ Create Dataset instance with not empty ``operations_history`` attribute. Returns ------- Dataset Dataset instance containing FeatureOperation instances in the `operations_history` attribute """ dataset = Dataset(df_object=DataFrameMock.df_generic(10)) dataset.track_history(fillna_col0_col1) dataset.track_history(fillna_col1_col4) return dataset
def it_knows_how_to_track_history(self, request, metadata_cols, derived_columns, expected_metadata_cols): operations_list_iadd_ = method_mock(request, OperationsList, "__iadd__") expected_df = DataFrameMock.df_generic(10) get_df_from_csv_ = function_mock(request, "trousse.dataset.get_df_from_csv") get_df_from_csv_.return_value = expected_df dataset = Dataset(data_file="fake/path", metadata_cols=metadata_cols) feat_op = fop.FillNA(columns=["metadata_num_col"], derived_columns=derived_columns, value=0) dataset.track_history(feat_op) assert dataset.metadata_cols == expected_metadata_cols operations_list_iadd_.assert_called_once_with(ANY, feat_op)
def test_track_history_with_no_derived_columns( self, request, metadata_columns, original_columns, expected_metadata_cols, ): df = DataFrameMock.df_generic(10) dataset = Dataset( df_object=df, metadata_cols=metadata_columns, ) feat_op = fop.FillNA(columns=original_columns, derived_columns=None, value=0) dataset.track_history(feat_op) for column in original_columns: # Check if the operation is added to each column assert feat_op in dataset.operations_history[column] assert dataset.metadata_cols == expected_metadata_cols