def test_pipeline_stage_to_pipeline_addition(): """Testing something.""" drop_num1 = SilentDropStage('num1') drop_num2 = SilentDropStage('num2') pipeline = PdPipeline([drop_num1]) assert len(pipeline) == 1 pipeline = drop_num2 + pipeline assert len(pipeline) == 2 df = _test_df() res_df = pipeline.apply(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' not in res_df.columns assert 'char' in res_df.columns
def test_pipeline_slice(): """Testing something.""" drop_num1 = SilentDropStage('num1') drop_num2 = SilentDropStage('num2') drop_char = SilentDropStage('char') pipeline = PdPipeline([drop_num1, drop_num2, drop_char]) assert len(pipeline) == 3 pipeline = pipeline[0:2] assert len(pipeline) == 2 df = _test_df() res_df = pipeline.apply(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' not in res_df.columns assert 'char' in res_df.columns
def test_pipeline_slice_by_name(): """Testing something.""" drop_num1 = SilentDropStage('num1', name='dropNum1') drop_num2 = SilentDropStage('num2', name='dropNum2') drop_char = SilentDropStage('char', name='dropChar') pipeline = PdPipeline([drop_num1, drop_num2, drop_char]) assert len(pipeline) == 3 pipeline = pipeline[['dropNum1', 'dropNum2']] assert len(pipeline) == 2 assert pipeline['dropNum1'] == drop_num1 with pytest.raises(ValueError) as e: pipeline['dropChar'] assert str(e.value) == "'dropChar' is not exist." df = _test_df() res_df = pipeline.apply(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' not in res_df.columns assert 'char' in res_df.columns
def test_pipeline_index(): """Testing something.""" df = _test_df() drop_num1 = SilentDropStage('num1') drop_num2 = SilentDropStage('num2') drop_char = SilentDropStage('char') pipeline = PdPipeline([drop_num1, drop_num2, drop_char]) assert len(pipeline) == 3 assert pipeline[0] == drop_num1 assert 'num1' not in pipeline[0](df).columns assert pipeline[1] == drop_num2 assert 'num2' not in pipeline[1](df).columns assert pipeline[2] == drop_char assert 'char' not in pipeline[2](df).columns
def test_pipeline_error(time): """Test exceptions at pipeline level""" # test fit df = _test_df() func = lambda df: df['num1'] == df['num3'] pipeline = PdPipeline([ColByFrameFunc("Equality", func), ColDrop("B")]) with pytest.raises(PipelineApplicationError): pipeline.fit(df, verbose=True, time=time) # test transform df = _test_df() with pytest.raises(PipelineApplicationError): pipeline.transform(df, verbose=True, time=time) # test fit_transform df = _test_df() with pytest.raises(PipelineApplicationError): pipeline.fit_transform(df, verbose=True, time=time)
def test_two_stage_pipeline_stage(): """Testing something.""" drop_num1 = SilentDropStage('num1') drop_num2 = SilentDropStage('num2') pipeline = PdPipeline([drop_num1, drop_num2]) assert len(pipeline) == 2 df = _test_df() res_df = pipeline.apply(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' not in res_df.columns assert 'char' in res_df.columns str(pipeline) # test fit df = _test_df() res_df = pipeline.fit(df, verbose=True) for x in ['num1', 'num2', 'char']: assert x in res_df.columns # test transform df = _test_df() res_df = pipeline.transform(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' not in res_df.columns assert 'char' in res_df.columns # test fit_transform df = _test_df() res_df = pipeline.fit_transform(df, verbose=True) # test get_transformer trs = lambda pipline: pipeline[:1] # noqa: E731 pipeline = PdPipeline([drop_num1, drop_num2], transformer_getter=trs) transformer = pipeline.get_transformer() res_df = transformer(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' in res_df.columns assert 'char' in res_df.columns
def test_pipeline_to_int_addition(): """Testing something.""" pipeline = PdPipeline([SilentDropStage('num1')]) with pytest.raises(TypeError): res = pipeline + 43 assert not isinstance(res, PdPipeline)