示例#1
0
def dataset_with_operations(fillna_col0_col1, fillna_col1_col4) -> Dataset:
    """
    Create Dataset instance with not empty ``operations_history`` attribute.

    Returns
    -------
    Dataset
        Dataset instance containing FeatureOperation instances
        in the `operations_history` attribute
    """
    dataset = Dataset(df_object=DataFrameMock.df_generic(10))

    dataset.track_history(fillna_col0_col1)
    dataset.track_history(fillna_col1_col4)

    return dataset
示例#2
0
    def it_knows_how_to_track_history(self, request, metadata_cols,
                                      derived_columns, expected_metadata_cols):
        operations_list_iadd_ = method_mock(request, OperationsList,
                                            "__iadd__")

        expected_df = DataFrameMock.df_generic(10)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = expected_df
        dataset = Dataset(data_file="fake/path", metadata_cols=metadata_cols)
        feat_op = fop.FillNA(columns=["metadata_num_col"],
                             derived_columns=derived_columns,
                             value=0)

        dataset.track_history(feat_op)

        assert dataset.metadata_cols == expected_metadata_cols
        operations_list_iadd_.assert_called_once_with(ANY, feat_op)
示例#3
0
    def test_track_history_with_no_derived_columns(
        self,
        request,
        metadata_columns,
        original_columns,
        expected_metadata_cols,
    ):
        df = DataFrameMock.df_generic(10)
        dataset = Dataset(
            df_object=df,
            metadata_cols=metadata_columns,
        )
        feat_op = fop.FillNA(columns=original_columns,
                             derived_columns=None,
                             value=0)

        dataset.track_history(feat_op)

        for column in original_columns:
            # Check if the operation is added to each column
            assert feat_op in dataset.operations_history[column]
        assert dataset.metadata_cols == expected_metadata_cols