def test_flow_parameters(): f = Flow(name="test") x = Parameter("x") y = Parameter("y", default=1) f.add_task(x) f.add_task(y) assert f.parameters() == {x, y}
def create_flow() -> Flow: local_parallelizing_environment = LocalEnvironment( executor=LocalDaskExecutor()) with Flow(FLOW_NAME, environment=local_parallelizing_environment) as flow: country = Parameter("country", default=DEFAULT_COUNTRY) bucket = Parameter("bucket", default=DEFAULT_BUCKET) covid_df = download_data() filtered_covid_df = filter_data(covid_df, country) prepared_df = enrich_data(filtered_covid_df) aggregated_df = aggregate_data(prepared_df) print_data(aggregated_df) csv_results = prepare_data_for_upload(aggregated_df) upload_to_s3(csv_results["csv"], csv_results["filename"], bucket=bucket) return flow
def create_flow() -> Flow: """Creates and returns flow object""" # Haven't used different executors enough to know the difference with Flow(FLOW_NAME, run_config=LocalRun()) as flow: country = Parameter("country", default=DEFAULT_COUNTRY) # covid_df = extract_whole_covid_data() covid_df = extract_covid_data_from_file() filtered_covid_df = filter_data(covid_df, country) # Only for whole data, not latest full_df = extract_full_country_data(filtered_covid_df) base_y = extract_label_column(full_df, 'new_cases') cleaned_y = clean_NaN(base_y) # print_head(cleaned_y) base_X = remove_overfit_columns(full_df, DROP_COLUMNS) cleaned_X = clean_NaN(base_X) optimal_X = optimize_feature_columns(cleaned_X, 10, cleaned_y) scaled_X = scale_data(optimal_X) # print_head(scaled_X) train_test_data = split_data(scaled_X, cleaned_y) check_data(train_test_data) # check_for_infinity(train_test_data) # Some issue with my data's format & type while being processed # within the model. # Going back to Kaggle. Maybe I'm using the wrong model? # Just don't know enough yet. # train_model = grid_search(train_test_data) # save_data(covid_df, 'raw', 'whole') return flow
def test_copy_requires_name(): x = Parameter("x") with pytest.raises(TypeError, match="required positional argument"): x.copy()
def test_copy_with_new_name(): x = Parameter("x") y = x.copy("y") assert x.name == x.slug == "x" assert y.name == y.slug == "y"
def test_call_does_not_accept_most_args(attr): x = Parameter("x") with pytest.raises(TypeError, match="unexpected keyword argument"): x(**{attr: None})
def test_call_must_have_a_flow_out_of_context(): with pytest.raises(ValueError, match="infer an active Flow"): Parameter("x")()
def test_call_accepts_flow(): f = Flow("test") Parameter("x")(flow=f) assert len(f.tasks) == 1
def test_calling_parameter_is_ok(): with Flow("test") as f: Parameter("x")() assert len(f.tasks) == 1
def test_raise_error_if_two_parameters_have_same_name(): f = Flow(name="test") f.add_task(Parameter("x")) assert "x" in {p.name for p in f.parameters()} with pytest.raises(ValueError): f.add_task(Parameter("x"))
def test_create_required_parameter(): x = Parameter("x", required=True) assert x.required
def test_create_parameter_with_default_is_not_required(): x = Parameter("x", default=2) assert not x.required
def test_parameter_slug_is_its_name(): x = Parameter("x") assert x.name == x.slug == "x"
def test_create_parameter_with_default_none(): x = Parameter("x", default=None) assert x.default is None assert not x.required assert x.run() is None
def test_create_parameter_with_default(): x = Parameter("x", default=2) assert x.default == 2 assert x.run() == 2
def test_create_parameter(): x = Parameter("x") assert isinstance(x, Task) assert x.default is None assert x.required