Exemplo n.º 1
0
    def test_multi_exp_single_flows_single_outputload(self):
        flow2 = d6tflow.WorkflowMulti({'experiment1': {'do_preprocess': False}, 'experiment2': {'do_preprocess': True}},
                                  task=TaskTrain)
        flow2.run(flow = "experiment1")

        out = flow2.outputLoad(TaskTrain)
        type(out).__name__ == "LogisticRegression"
Exemplo n.º 2
0
    def test_output_load_comparing_with_task_run(self):
        params2 = {'param1': 2}
        params = {'param1': 1}
        flow2 = d6tflow.WorkflowMulti({1: params, 2: params2}, task=Task1)

        flow2.run()
        assert flow2.outputLoad(flow = 1) == Task1(**params).outputLoad()
        assert flow2.outputLoad(flow = 2) == Task1(**params2).outputLoad()
Exemplo n.º 3
0
    def test_multi_exp_all_flows_outputloadall(self):
        flow2 = d6tflow.WorkflowMulti({'experiment1': {'do_preprocess': False}, 'experiment2': {'do_preprocess': True}},
                                  task=TaskTrain)
        flow2.run()

        data = flow2.outputLoadAll()
        type(data['experiment1']['TaskTrain']).__name__ == "LogisticRegression"
        type(data['experiment2']['TaskTrain']).__name__ == "LogisticRegression"
Exemplo n.º 4
0
    def test_output_load_all_comparing_with_task_run(self):
        params2 = {'param1': 2}
        params = {'param1': 1}
        flow2 = d6tflow.WorkflowMulti({1: params, 2: params2}, task=Task1)

        flow2.run()
        outputs = flow2.outputLoadAll()
        assert outputs[1]['Task1'] == Task1(**params).outputLoad()
        assert outputs[2]['Task1'] == Task1(**params2).outputLoad()
Exemplo n.º 5
0
 def test_multi_get_task(self):
     params2 = {'param1': 2}
     params = {'param1': 1}
     flow2 = d6tflow.WorkflowMulti(params={
         1: params,
         2: params2
     },
                                   task=Task1)
     assert flow2.get_task(flow=1).param_kwargs['param1'] == 1
     assert flow2.get_task(flow=2).param_kwargs['param1'] == 2
Exemplo n.º 6
0
 def test_no_default_expect_error(self):
     with pytest.raises(RuntimeError):
         flow2 = d6tflow.WorkflowMulti(
             params={
                 'experiment1': {
                     'do_preprocess': False
                 },
                 'experiment2': {
                     'do_preprocess': True
                 }
             })
         flow2.run()
Exemplo n.º 7
0
 def test_default_task(self):
     flow2 = d6tflow.WorkflowMulti(
         params={
             'experiment1': {
                 'do_preprocess': False
             },
             'experiment2': {
                 'do_preprocess': True
             }
         })
     flow2.set_default(TaskTrain)
     out_class = flow2.get_task()
     type(out_class).__name__ == "TaskTrain"
Exemplo n.º 8
0
    def test_preview_single_flow(self):
        flow2 = d6tflow.WorkflowMulti({'experiment1': {'do_preprocess': False}, 'experiment2': {'do_preprocess': True}},
                                  task=TaskTrain)

        import io
        from contextlib import redirect_stdout

        with io.StringIO() as buf, redirect_stdout(buf):
            flow2.preview(TaskTrain, flow = 'experiment1')
            output = buf.getvalue()
            assert output.count('PENDING')==0
            assert output.count('COMPLETE')==3
            assert output.count("'do_preprocess': 'False'")==1
Exemplo n.º 9
0
Arquivo: main.py Projeto: d6t/d6tflow
def test_path():
    class Task1(d6tflow.tasks.TaskPickle):
        def run(self):
            self.save({1: 1})

    class Task2(d6tflow.tasks.TaskPickle):
        def run(self):
            self.save({1: 1})

    path = 'data/data2/'
    assert 'data2' in str(Task1(path=path).output().path)
    flow = d6tflow.Workflow(Task1, path=path)
    assert 'data2' in str(flow.get_task().output().path)
    flow2 = d6tflow.WorkflowMulti(Task2, params={0: {}}, path=path)
    assert 'data2' in str(flow2.get_task()[0].output().path)
Exemplo n.º 10
0
            model = sklearn.ensemble.GradientBoostingRegressor()
        else:
            raise ValueError('invalid model selection')
        model.fit(df_train.drop('y', 1), df_train['y'])
        self.save(model)
        self.saveMeta(
            {'score': model.score(df_train.drop('y', 1), df_train['y'])})


# goal: compare performance of two models
params_model1 = {'do_preprocess': True, 'model': 'ols'}
params_model2 = {'do_preprocess': False, 'model': 'gbm'}

# run workflow
flow = d6tflow.WorkflowMulti(ModelTrain, {
    'ols': params_model1,
    'gbm': params_model2
})
flow.reset_upstream(confirm=False)  # force re-run
print(flow.preview('ols'))

flow.run()
'''
Scheduled 3 tasks of which:
* 3 ran successfully:
    - 1 GetData()
    - 1 ModelData(do_preprocess=True)
    - 1 ModelTrain(do_preprocess=True, model=ols)

# To run 2nd model, don't need to re-run all tasks, only the ones that changed
Scheduled 3 tasks of which:
* 1 complete ones were encountered:
Exemplo n.º 11
0
        elif self.model == 'gbm':
            model = sklearn.ensemble.GradientBoostingRegressor()

        # fit and save model with training score
        model.fit(df_trainX, df_trainY)
        self.save(model)  # persist/cache model
        self.saveMeta({'score': model.score(df_trainX,
                                            df_trainY)})  # save model score


# goal: compare performance of two models
# define workflow manager
flow = d6tflow.WorkflowMulti(ModelTrain, {
    'model1': {
        'model': 'ols'
    },
    'model2': {
        'model': 'gbm'
    }
})
flow.reset_upstream(confirm=False)  # DEMO ONLY: force re-run
flow.run()  # execute model training including all dependencies
'''
===== Execution Summary =====
Scheduled 2 tasks of which:
* 2 ran successfully:
    - 1 GetData()
    - 1 ModelTrain(model=ols)
This progress looks :) because there were no failed tasks or missing dependencies
'''

scores = flow.outputLoadMeta()  # load model scores
Exemplo n.º 12
0
 def test_no_exp_params_passed(self):
     with pytest.raises(Exception) as e:
         flow2 = d6tflow.WorkflowMulti()
         assert str(e.value) == "Experments not defined"