示例#1
0
 def test_pass_on_injected_format(self):
     task = TTextDataTask(
         text_data=target(
             scenario_path("data/some_unknown_ext.myext"), config=file.txt
         )
     )
     assert_run_task(task)
示例#2
0
    def test_definition_inplace_param(self):
        @task
        def t_f_call(a=parameter[int]):
            assert a == 6

        t_f_call(a=6)
        assert_run_task(t_f_call.t(a=6))
示例#3
0
 def test__by_day_simple_local(self):
     with dbnd_config({
             ProductionIdsAndData.task_env: "local",
             FetchIds.task_enabled_in_prod: True,
             FetchData.task_enabled_in_prod: True,
     }):
         assert_run_task(ByDayExamplePipeline(period="2d"))
示例#4
0
 def test_params_inherited_parse(self):
     target = DummyWrapper(
         override={
             DummyTask.expected_param: "0.1",
             DummyTask.expected_timedelta_param: "4d",
         })
     assert_run_task(target)
示例#5
0
    def test_simple_defaults(self):
        @task
        def t_f_defaults(a=5):
            assert a == 5

        t_f_defaults()
        assert_run_task(t_f_defaults.t())
    def test_custom_decorator_usage(self):
        @my_experiment
        def run_splits(previous_exp=1):
            logging.warning("Running splits!!!  %s", previous_exp)
            return 1, 2, 1

        @my_experiment
        def my_experiement(alpha=0.2, previous_exp=1):
            logging.warning("My previous exp = %s", previous_exp)

            logging.warning(" Running some splits")

            t = run_splits.t()
            t.dbnd_run()

            logging.warning(" Done some splits")

            return 1, 2, t.result.read_pickle()

        my_exp = my_experiement.t(alpha=0.4)

        # wee can't support creating same task under different dags
        # for the second time - dag will not be added to task
        # my_exp2 = my_experiement.t(previous_exp=my_exp.my_ratio)
        assert_run_task(my_exp)
示例#7
0
 def test_custom_partition_from_ctor(self):
     task = TTask(
         task_output_path_format=
         "{root}/{env_label}/{task_family}{task_class_version}_custom/"
         "{output_name}{output_ext}/date={task_target_date}")
     assert_run_task(task)
     assert "TTask_custom/t_output.csv/" in str(task.t_output)
示例#8
0
    def test_generated_output_dict(self):
        def _get_all_splits(
                task, task_output):  # type: (Task, ParameterBase) -> dict
            result = {}
            target = task_output.build_target(task)
            for i in range(task.parts):
                name = "part_%s" % i
                result[name] = (
                    target.partition(name="train_%s" % name),
                    target.partition(name="test_%s" % name),
                )

            return result

        class TGeneratedOutputs(PythonTask):
            parts = parameter.value(3)
            splits = output.csv.folder(output_factory=_get_all_splits)

            def run(self):
                for key, split in self.splits.items():
                    train, test = split
                    train.write(key)
                    test.write(key)

        assert_run_task(TGeneratedOutputs())
示例#9
0
    def test_spark_inline_same_context(self):
        from pyspark.sql import SparkSession
        from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline

        with SparkSession.builder.getOrCreate() as sc:
            with config({SparkLocalEngineConfig.enable_spark_context_inplace: True}):
                assert_run_task(word_count_inline.t(text=__file__))
示例#10
0
    def test_inline_call_with_inline_band(self, target_1_2):
        @task
        def t_f_2nd(a):
            # also, no typing
            return t_f_b(a)

        @band
        def t_f_inline_band(a):
            # also, no typing
            return t_f_2nd(a)

        @task
        def t_f_1st(a):
            # type: (DataList[str])-> List[str]
            x = t_f_inline_band(a)
            assert x == ["s_1", "s_2"]
            return x

        @band
        def t_f_band(a):
            # type: (DataList[str])-> FileTarget
            x = t_f_1st(a)
            assert isinstance(x, FileTarget)
            return x

        assert_run_task(t_f_band.t(a=target_1_2))
示例#11
0
    def test_simple_no_call(self):
        @task
        def t_f_nocall(a=5):
            assert a == 6

        t_f_nocall(a=6)
        assert_run_task(t_f_nocall.t(a=6))
示例#12
0
    def test_deco_ret_task(self):
        @band
        def ret_dict():
            v = TTask(t_param=1)
            return v

        assert_run_task(ret_dict.t())
示例#13
0
    def test_task_artifacts(self, matplot_figure, tmpdir):
        lorem = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt\n"
        data = tmpdir / "data.txt"
        data.write(lorem)

        artifact_dir = tmpdir.mkdir("dir")
        sub_file = artifact_dir.mkdir("subdir").join("sub_file")
        sub_file.write(lorem)

        class TTaskArtifacts(TTask):
            def run(self):
                self.log_artifact("my_tmp_file", str(data))
                self.log_artifact("my_figure", matplot_figure)
                self.log_artifact("my_dir", str(artifact_dir) + "/")
                super(TTaskArtifacts, self).run()

        task = TTaskArtifacts()
        assert_run_task(task)
        actual = task._meta_output.list_partitions()
        actual_strings = list(map(str, actual))
        assert any(
            ["my_tmp_file" in os.path.basename(s) for s in actual_strings])
        assert any(
            ["my_figure" in os.path.basename(s) for s in actual_strings])
        assert any(["sub_file" in os.path.basename(s) for s in actual_strings])
示例#14
0
    def test_simple_with_call(self):
        @task()
        def t_f_call(a=5):
            assert a == 6

        t_f_call(a=6)
        assert_run_task(t_f_call.t(a=6))
示例#15
0
    def test_definition_inplace_output(self):
        @task
        def t_f_call(a=parameter[int], f_output=output[Target]):
            f_output.write(str(a))
            return None

        assert_run_task(t_f_call.t(a=6))
示例#16
0
    def test_task_input_via_band1(self, file_on_disk):
        class TTAskWithInputTask1(PipelineTask):
            t_output = output.data

            def band(self):
                self.t_output = TTaskWithInput(t_input=file_on_disk.path)

        assert_run_task(TTAskWithInputTask1())
示例#17
0
 def test_wine_quality_deco_simple_all(self):
     task = wine_quality_decorators.predict_wine_quality.t(
         alpha=0.5,
         override={
             wine_quality_decorators.fetch_data.t.task_env: "local_prod"
         },
     )
     assert_run_task(task)
示例#18
0
 def test_wine_quality_deco_simple_all(self):
     with dbnd_config(
         {"local_prod": {"_from": "local", "env_label": "prod", "production": True}}
     ):
         task = wine_quality.predict_wine_quality.t(
             alpha=0.5, override={wine_quality.fetch_data.t.task_env: "local_prod"}
         )
         assert_run_task(task)
示例#19
0
    def test_inline_call_with_res(self, target_1_2):
        @task
        def t_f_parent(a):
            # type: (DataList[str])-> None
            x = t_f_b(a)
            assert x == ["s_1", "s_2"]

        assert_run_task(t_f_parent.t(a=target_1_2))
示例#20
0
 def test_prod_immutable_output_example(self):
     with dbnd_config({
             FetchIds.task_enabled_in_prod: True,
             FetchData.task_enabled_in_prod: True
     }):
         task = ProductionIdsAndData(
             task_env=get_databand_context().env.clone(production=True))
         assert_run_task(task)
示例#21
0
 def test_word_count_inline(self):
     with dbnd_config(disable_tracker_api()):
         assert_run_task(
             word_count_inline.t(
                 text=TEXT_FILE,
                 task_version=str(random.random()),
                 override=conf_override,
             ))
示例#22
0
 def test_wine_quality_deco_search(self):
     task = wine_quality_decorators.predict_wine_quality_parameter_search.t(
         alpha_step=0.5,
         override={
             wine_quality_decorators.predict_wine_quality.t.data:
             data_repo.wines
         },
     )
     assert_run_task(task)
示例#23
0
    def test_custom_parition(self):
        class CustomOutputsTTask(TTask):
            _conf__base_output_path_fmt = (
                "{root}/{env_label}/{task_family}{task_class_version}_custom/"
                "{output_name}{output_ext}/date={task_target_date}")

        task = CustomOutputsTTask()
        assert_run_task(task)
        assert "CustomOutputsTTask_custom/t_output.csv/" in str(task.t_output)
示例#24
0
    def test_input_filename(self, target_1_2):
        @task
        def t_f_path(a):
            # type: (PathStr) -> str
            assert target_1_2.path == a
            return a

        t_f_path(a=target_1_2.path)
        assert_run_task(t_f_path.t(a=target_1_2))
示例#25
0
    def test_run_all_task(self):
        task = T22_function_with_different_inputs.f_test_flow.task()
        # this code runs outsider @band context, we need to explicitly state .task(),
        #  otherwise function will be executed in place
        # so 'task' is a Task object, it's a definition of the Pipeline, it still not executed

        assert_run_task(task)
        actual = task.result.load(str)
        assert actual == "OK"
示例#26
0
    def test_word_count_inline(self):
        from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline

        assert_run_task(
            word_count_inline.t(
                text=config.get("livy_tests", "text"),
                task_version=str(random.random()),
                override=conf_override,
            ))
示例#27
0
    def test_io(self):
        from dbnd_test_scenarios.spark.spark_io_inline import dataframes_io_pandas_spark

        assert_run_task(
            dataframes_io_pandas_spark.t(
                text=TEXT_FILE,
                task_version=str(random.random()),
                override=conf_override,
            )
        )
示例#28
0
    def test_band_ret_task(self):
        class TMultipleOutputsPipeline(PipelineTask):
            t_types = parameter.value([1, 2])
            t_output = output

            def band(self):
                self.t_output = {t: TTask(t_param=t).t_output for t in self.t_types}

        task = TMultipleOutputsPipeline()
        assert_run_task(task)
示例#29
0
    def test_word_count_inline(self):
        from dbnd_test_scenarios.spark.spark_tasks_inline import word_count_inline

        assert_run_task(
            word_count_inline.t(
                text=TEXT_FILE,
                task_version=str(random.random()),
                override=conf_override,
            )
        )
示例#30
0
    def test_spark_inline_same_context(self):
        from pyspark.sql import SparkSession

        from dbnd_examples.orchestration.dbnd_spark.word_count import word_count_inline
        from dbnd_spark.local.local_spark_config import SparkLocalEngineConfig

        with SparkSession.builder.getOrCreate() as sc:
            with config({SparkLocalEngineConfig.enable_spark_context_inplace: True}):
                task_instance = word_count_inline.t(text=__file__)
                assert_run_task(task_instance)