def test_misspelled_task_suggestion(self): with pytest.raises( TaskClassNotFoundException, match="dbnd_sanity_check" ) as exc_info: dbnd_run_cmd(["dbnd_sanity_che", "-r", "x=5"]) logger.info("exc_info: %s", exc_info)
def test_type_hints_cmdline(self, pandas_data_frame): my_target = self.target("file.parquet") my_target.write_df(pandas_data_frame) @task def t_f_cmd_hints(a_str, b_datetime, c_timedelta, d_int): # type: (str, datetime.datetime, datetime.timedelta, int) -> DataFrame assert a_str == "1" assert b_datetime.isoformat() == "2018-01-01T10:10:10.100000+00:00" assert c_timedelta == datetime.timedelta(days=5) assert d_int == 1 return pandas_data_frame dbnd_run_cmd( [ "t_f_cmd_hints", "-r", "a_str=1", "-r", "b_datetime=2018-01-01T101010.1", "-r", "c_timedelta=5d", "-r", "d_int=1", ] )
def test_submit_driver_req(self, mock_client): with dbnd_config(K8S_CONFIG): dbnd_run_cmd( [ "dbnd_sanity_check", "--env=gcp_k8s", "--set-config", "kubernetes.container_tag=tag", ] ) calls = mock_client().create_namespaced_pod.call_args_list assert len(calls) == 1 call = calls[0].kwargs # 1) test - default labels req_labels = call["body"]["metadata"]["labels"] assert is_sub_dict( req_labels, { "dbnd_task_family": "d.d.d.docker-run-task", "dbnd_task_name": "dbnd-driver-run", "dbnd_task_af_id": "dbnd-driver-run", "dbnd": "dbnd-system-task-run", }, ) # 2) test - running the driver with the global resources assert call["body"]["spec"]["containers"][0]["resources"] == { "limits": {"test_limits": 1}, "requests": {"memory": "1536Mi", "cpu": "1"}, }
def test_type_hints_from_defaults_cmdline(self, pandas_data_frame): my_target = self.target("file.parquet") my_target.write_df(pandas_data_frame) @task def t_f_defaults_cmdline( a_str="", b_datetime=datetime.datetime.utcnow(), c_timedelta=datetime.timedelta(), d_int=0, ): assert a_str == "1" assert b_datetime.isoformat() == "2018-01-01T10:10:10.100000+00:00" assert c_timedelta == datetime.timedelta(days=5) assert d_int == 1 return pandas_data_frame dbnd_run_cmd([ "t_f_defaults_cmdline", "-r", "a_str=1", "-r", "b_datetime=2018-01-01T101010.1", "-r", "c_timedelta=5d", "-r", "d_int=1", ])
def test_cli_with_defaults(self): """ Verify that we also read from the config when we build tasks from the command line parsers. """ dbnd_run_cmd(["TConfigTask", "--set", "TConfigTask.t_param=123"])
def test_dynamic_loading(self): dbnd_run_cmd( [ "--module", "test_dbnd.scenarios.do_not_import", "DynamicImportTask", "-r", "x=123", ] )
def test_describe_double_verbose(self): args = [ "FooBaseTask", "-r", "t_param=hello", "--verbose", "--verbose", "--describe", ] dbnd_run_cmd(args)
def test_cli_no_default(self): """ Verify that we also read from the config when we build tasks from the command line parsers. """ set_conf = json.dumps( {"TConfigNoDefault": { "t_param_no_default": "123" }}) dbnd_run_cmd(["TConfigNoDefault", "--set", set_conf])
def testWithNamespaceCli(self): class A(TTask): task_namespace = "mynamespace" p1 = parameter.value(100) expected = parameter[int] def complete(self): if self.p1 != self.expected: raise ValueError return True assert dbnd_run_cmd("mynamespace.A -r expected=100") assert dbnd_run_cmd("mynamespace.A -r p1=200 -r expected=200")
def testListWithNamespaceCli(self): class A(TTask): task_namespace = "mynamespace" l_param = parameter.value([1, 2, 3]) expected = parameter[List[int]] def complete(self): if self.l_param != self.expected: raise ValueError return True assert dbnd_run_cmd("mynamespace.A -r expected=[1,2,3]") assert dbnd_run_cmd("mynamespace.A -r l_param=[1,2,3] -r expected=[1,2,3]")
def testTupleWithNamespaceCli(self): class A(TTask): task_namespace = "mynamespace" t = parameter.value(((1, 2), (3, 4))) expected = parameter[Tuple] def complete(self): if self.t != self.expected: raise ValueError return True assert dbnd_run_cmd("mynamespace.A -r expected=((1,2),(3,4))") assert dbnd_run_cmd( "mynamespace.A -r t=((1,2),(3,4)) -r expected=((1,2),(3,4))")
def test_simple_cli(self): dbnd_run_cmd([ "t_d_1", "-r", "a_str=1", "-r", "b_datetime=2018-01-01T101010.1", "-r", " c_timedelta=5d", "--set", json.dumps({"t_d_1": { "d_int": 1 }}), ])
def dbnd_run_task_with_output( self, run_args, task=TTask, output_parameter=TTask.t_output, call_f=dbnd_run_cmd ): local_file = str(self.tmpdir.join("output_file.txt")) run_args = [ TTask.task_definition.full_task_family, "--set", "TTask.t_output=%s" % local_file, ] + run_args logging.info("Running command:%s", subprocess.list2cmdline(run_args)) dbnd_run_cmd(run_args) assert os.path.exists(local_file), ( "Output file %s wasn't created by task!" % local_file )
def test_parallel_local_executor(self): cmd = [ "-m", ParallelTasksPipeline.__module__, ParallelTasksPipeline.get_task_family(), "--parallel", "-r", "num_of_tasks=2", ] if "sqlite" in settings.SQL_ALCHEMY_CONN: with pytest.raises(DatabandConfigError): # not supported on sqlite dbnd_run_cmd(cmd) else: dbnd_run_cmd(cmd)
def test_build_task_with_task_band_from_cli(self, task_band_file): run = dbnd_run_cmd(["First", "--set", "First.input_1=3"]) assert run.run_executor.result.load("result") == 3 run = dbnd_run_cmd([ "First", "--set", "First.input_1=3", "--set", "First.task_band={path}".format(path=task_band_file.strpath), ]) # accessing the result and check that the used value is the one from the task_band assert run.run_executor.result.load("result") == 1
def test_run_with_overridden_run_uid(self): expected_run_uid = str(uuid.uuid1()) my_run = dbnd_run_cmd( ["dbnd_sanity_check", "--override-run-uid", expected_run_uid] ) assert str(my_run.run_uid) == expected_run_uid assert my_run.existing_run is False
def test_sub_task(self): task = dbnd_run_cmd( ["TCmdPipe", "-r", "x=foo", "-r", "y=bar", "-s", "TCmdTask.y=xyz"]).task # type: TCmdPipe assert task.x == "foo" assert task.y == "bar" t_cmd = task.task_dag.select_by_task_names("TCmdTask")[0] assert t_cmd.y == "xyz"
def test_specific_takes_precedence(self): class MyTask(TTask): param = parameter[int] def run(self): super(MyTask, self).run() assert self.param == 6 assert dbnd_run_cmd("MyTask -r param=5 -o MyTask.param=6")
def test_local_params(self): class MyTask(TTask): param1 = parameter[int] param2 = parameter.value(default=False) def run(self): super(MyTask, self).run() assert self.param1 == 1 and self.param2 assert dbnd_run_cmd("MyTask -r param1=1 -r param2=True")
def test_auto_load(self): with dbnd_config({ "autotestconfig": { "param_datetime": "2018-01-01", "param_int": "42" }, "core": { "user_configs": "autotestconfig", "user_init": "test_dbnd.orchestration.config.autoloaded_config.user_code_load_config", }, "databand": { "module": "test_dbnd.orchestration.config.autoloaded_config" }, }): dbnd_run_cmd("task_auto_config")
def test_use_config_class_with_configuration(self): result = dbnd_run_cmd([ "MyConfigTester", "--set", json.dumps({"MyConfig": { "mc_p": "123", "mc_q": "345" }}), ]) actual = result.task.t_output.load(object) assert actual == [123, 345]
def test_param_override_2(self): result = dbnd_run_cmd([ "MyConfigTester", "-s", "{'MyConfig.mc_p': '999', 'MyConfig.mc_q': '888'}", "-o", "{'MyConfig.mc_p': '222', 'MyConfig.mc_q': '223'}", ]) actual = result.task.t_output.load(object) assert actual == [222, 223]
def test_generated_command_line(self): t = TaskInfoParamsTask(str_param=15, num_param=12, list_param=[1, 2, 3]) cmd_line_as_str = t.task_meta.task_command_line cmd_line = shlex.split(cmd_line_as_str) assert cmd_line_as_str.startswith("dbnd run") # check that outputs are filtered out assert "t_output" not in cmd_line_as_str # check that defaults are filtered out assert "date-param" not in cmd_line_as_str assert dbnd_run_cmd(cmd_line[2:])
def test_from_extra_config(self): class MyTaskWithConfg(TTask): parameter_with_config = parameter[str] def __init__(self, **kwargs): super(MyTaskWithConfg, self).__init__(**kwargs) def run(self): super(MyTaskWithConfg, self).run() assert self.parameter_with_config == "value_from_config" assert dbnd_run_cmd("MyTaskWithConfg --conf-file %s" % scenario_path("config_files/test_cfg_switch.cfg"))
def test_cli_raises1(self): """ Verify that we also read from the config when we build tasks from the command line parsers. """ dbnd_run_cmd([ "TConfigTask", "-s", "TConfigTask.t_param=124", "-s", "TConfigTask.t_param=123", ]) run_locally__raises( DatabandRunError, [ "TConfigTask", "-s", "TConfigTask.t_param=123", "-s", "TConfigTask.t_param=124", ], )
def test_output_override_class(self): class TTaskOutputOverride(PythonTask): simple_output = output.data def run(self): self.simple_output = "test" result = dbnd_run_cmd([ "TTaskOutputOverride", "--set", "TTaskOutputOverride.simple_output__target=t1.txt", ]) assert str(result.task.simple_output).endswith("txt")
def test_run_selected_task(self): result_run = dbnd_run_cmd([ "dbnd_test_scenarios.pipelines.simple_pipeline.simple_pipeline", "--set", "run.selected_tasks_regex=log_some_data", ]) task_runs_dict = { tr.task.task_name: tr.task_run_state for tr in result_run.task_runs } assert task_runs_dict["dbnd_driver"] == TaskRunState.SUCCESS assert task_runs_dict["get_some_data"] == TaskRunState.SUCCESS assert task_runs_dict["log_some_data"] == TaskRunState.SUCCESS assert task_runs_dict["calc_and_log"] is None assert (task_runs_dict[ "dbnd_test_scenarios.pipelines.simple_pipeline.simple_pipeline"] is None)
def test_from_config_cli_inline(self): class MyTaskWithConfgInline(TTask): parameter_with_config = parameter[str] def __init__(self, **kwargs): super(MyTaskWithConfgInline, self).__init__(**kwargs) def run(self): super(MyTaskWithConfgInline, self).run() assert self.parameter_with_config == "value_from_inline" json_value = json.dumps({ "MyTaskWithConfgInline": { "parameter_with_config": "value_from_inline" } }) assert dbnd_run_cmd("MyTaskWithConfgInline -s '%s'" % json_value)
def test_serialize(self): class DepTask(PipelineTask): task_param = TaskParameter() some_output = output def band(self): self.some_output = self.task_param() class MainTask(PipelineTask): some_other_output = output def band(self): self.some_other_output = DepTask(task_param=TTask) # OtherTask is serialized because it is used as an argument for DepTask. assert dbnd_run_cmd(["MainTask"])
def test_generated_command_line(self): t = TaskInfoParamsTask( str_param=15, num_param=12, list_param=[1, 2, 3], none_param=None, str_as_target=target(__file__), ) cmd_line_as_str = t.ctrl.task_repr.task_command_line cmd_line = shlex.split(cmd_line_as_str) logger.info("Command line: %s", cmd_line_as_str) assert cmd_line_as_str.startswith("dbnd run") # check that outputs are filtered out assert "t_output" not in cmd_line_as_str # check that defaults are filtered out assert "date-param" not in cmd_line_as_str assert dbnd_run_cmd(cmd_line[2:])