Exemplo n.º 1
0
def test_flow_parameters():
    f = Flow(name="test")
    x = Parameter("x")
    y = Parameter("y", default=1)
    f.add_task(x)
    f.add_task(y)

    assert f.parameters() == {x, y}
Exemplo n.º 2
0
def create_flow() -> Flow:
    local_parallelizing_environment = LocalEnvironment(
        executor=LocalDaskExecutor())

    with Flow(FLOW_NAME, environment=local_parallelizing_environment) as flow:
        country = Parameter("country", default=DEFAULT_COUNTRY)
        bucket = Parameter("bucket", default=DEFAULT_BUCKET)
        covid_df = download_data()
        filtered_covid_df = filter_data(covid_df, country)
        prepared_df = enrich_data(filtered_covid_df)
        aggregated_df = aggregate_data(prepared_df)
        print_data(aggregated_df)
        csv_results = prepare_data_for_upload(aggregated_df)
        upload_to_s3(csv_results["csv"],
                     csv_results["filename"],
                     bucket=bucket)

    return flow
Exemplo n.º 3
0
def create_flow() -> Flow:
    """Creates and returns flow object"""
    # Haven't used different executors enough to know the difference
    with Flow(FLOW_NAME, run_config=LocalRun()) as flow:
        country = Parameter("country", default=DEFAULT_COUNTRY)

        # covid_df = extract_whole_covid_data()
        covid_df = extract_covid_data_from_file()
        filtered_covid_df = filter_data(covid_df, country)

        # Only for whole data, not latest
        full_df = extract_full_country_data(filtered_covid_df)

        base_y = extract_label_column(full_df, 'new_cases')
        cleaned_y = clean_NaN(base_y)
        # print_head(cleaned_y)

        base_X = remove_overfit_columns(full_df, DROP_COLUMNS)
        cleaned_X = clean_NaN(base_X)
        optimal_X = optimize_feature_columns(cleaned_X, 10, cleaned_y)
        scaled_X = scale_data(optimal_X)
        # print_head(scaled_X)

        train_test_data = split_data(scaled_X, cleaned_y)
        check_data(train_test_data)
        # check_for_infinity(train_test_data)

        # Some issue with my data's format & type while being processed
        # within the model.
        # Going back to Kaggle. Maybe I'm using the wrong model?
        # Just don't know enough yet.
        # train_model = grid_search(train_test_data)

        # save_data(covid_df, 'raw', 'whole')

    return flow
Exemplo n.º 4
0
def test_copy_requires_name():
    x = Parameter("x")
    with pytest.raises(TypeError, match="required positional argument"):
        x.copy()
Exemplo n.º 5
0
def test_copy_with_new_name():
    x = Parameter("x")
    y = x.copy("y")

    assert x.name == x.slug == "x"
    assert y.name == y.slug == "y"
Exemplo n.º 6
0
def test_call_does_not_accept_most_args(attr):
    x = Parameter("x")
    with pytest.raises(TypeError, match="unexpected keyword argument"):
        x(**{attr: None})
Exemplo n.º 7
0
def test_call_must_have_a_flow_out_of_context():
    with pytest.raises(ValueError, match="infer an active Flow"):
        Parameter("x")()
Exemplo n.º 8
0
def test_call_accepts_flow():
    f = Flow("test")
    Parameter("x")(flow=f)

    assert len(f.tasks) == 1
Exemplo n.º 9
0
def test_calling_parameter_is_ok():
    with Flow("test") as f:
        Parameter("x")()

    assert len(f.tasks) == 1
Exemplo n.º 10
0
def test_raise_error_if_two_parameters_have_same_name():
    f = Flow(name="test")
    f.add_task(Parameter("x"))
    assert "x" in {p.name for p in f.parameters()}
    with pytest.raises(ValueError):
        f.add_task(Parameter("x"))
Exemplo n.º 11
0
def test_create_required_parameter():
    x = Parameter("x", required=True)
    assert x.required
Exemplo n.º 12
0
def test_create_parameter_with_default_is_not_required():
    x = Parameter("x", default=2)
    assert not x.required
Exemplo n.º 13
0
def test_parameter_slug_is_its_name():
    x = Parameter("x")
    assert x.name == x.slug == "x"
Exemplo n.º 14
0
def test_create_parameter_with_default_none():
    x = Parameter("x", default=None)
    assert x.default is None
    assert not x.required
    assert x.run() is None
Exemplo n.º 15
0
def test_create_parameter_with_default():
    x = Parameter("x", default=2)
    assert x.default == 2
    assert x.run() == 2
Exemplo n.º 16
0
def test_create_parameter():
    x = Parameter("x")
    assert isinstance(x, Task)
    assert x.default is None
    assert x.required