def test_compute_log_manager_subscription_updates(): from dagster.core.storage.local_compute_log_manager import LocalComputeLogManager with tempfile.TemporaryDirectory() as temp_dir: compute_log_manager = LocalComputeLogManager(temp_dir, polling_timeout=0.5) run_id = "fake_run_id" step_key = "spew" stdout_path = compute_log_manager.get_local_path(run_id, step_key, ComputeIOType.STDOUT) # make sure the parent directory to be watched exists, file exists ensure_dir(os.path.dirname(stdout_path)) touch_file(stdout_path) # set up the subscription messages = [] observable = compute_log_manager.observable(run_id, step_key, ComputeIOType.STDOUT) observable.subscribe(messages.append) # returns a single update, with 0 data assert len(messages) == 1 last_chunk = messages[-1] assert not last_chunk.data assert last_chunk.cursor == 0 with open(stdout_path, "a+") as f: print(HELLO_SOLID, file=f) # pylint:disable=print-call # wait longer than the watchdog timeout time.sleep(1) assert len(messages) == 2 last_chunk = messages[-1] assert last_chunk.data assert last_chunk.cursor > 0
def test_bad_workspace_yaml_load(): with TemporaryDirectory() as temp_dir: touch_file(os.path.join(temp_dir, "foo.yaml")) with pytest.raises( CheckError, match= ("Invariant failed. Description: Could not parse a workspace config from the " "yaml file at"), ): with load_workspace_from_yaml_paths( [os.path.join(temp_dir, "foo.yaml")]): pass
def mirror_step_io(step_context): # https://github.com/dagster-io/dagster/issues/1698 if not should_capture_stdout(step_context.instance): yield return check.inst_param(step_context, 'step_context', SystemStepExecutionContext) filebase = _filebase(step_context.instance, step_context.run_id, step_context.step.key) outpath = _filepath(filebase, IO_TYPE_STDOUT) errpath = _filepath(filebase, IO_TYPE_STDERR) touchpath = _filepath(filebase, IO_TYPE_COMPLETE) ensure_dir(os.path.dirname(outpath)) ensure_dir(os.path.dirname(errpath)) with mirror_stream(sys.stderr, errpath): with mirror_stream(sys.stdout, outpath): yield # touch the file to signify that compute is complete touch_file(touchpath)
def on_compute_finish(self, step_context): touchpath = self.complete_artifact_path(step_context.run_id, step_context.step.key) touch_file(touchpath)
def test_failure_backfill(external_repo_context): output_file = _failure_flag_file() with instance_for_context(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): external_partition_set = external_repo.get_external_partition_set( "conditionally_fail_partition_set") instance.add_backfill( PartitionBackfill( backfill_id="shouldfail", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=False, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert instance.get_runs_count() == 0 try: touch_file(output_file) list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"))) wait_for_all_runs_to_start(instance) finally: os.remove(output_file) assert instance.get_runs_count() == 3 runs = instance.get_runs() three, two, one = runs assert one.tags[BACKFILL_ID_TAG] == "shouldfail" assert one.tags[PARTITION_NAME_TAG] == "one" assert one.status == PipelineRunStatus.FAILURE assert step_succeeded(instance, one, "always_succeed") assert step_failed(instance, one, "conditionally_fail") assert step_did_not_run(instance, one, "after_failure") assert two.tags[BACKFILL_ID_TAG] == "shouldfail" assert two.tags[PARTITION_NAME_TAG] == "two" assert two.status == PipelineRunStatus.FAILURE assert step_succeeded(instance, two, "always_succeed") assert step_failed(instance, two, "conditionally_fail") assert step_did_not_run(instance, two, "after_failure") assert three.tags[BACKFILL_ID_TAG] == "shouldfail" assert three.tags[PARTITION_NAME_TAG] == "three" assert three.status == PipelineRunStatus.FAILURE assert step_succeeded(instance, three, "always_succeed") assert step_failed(instance, three, "conditionally_fail") assert step_did_not_run(instance, three, "after_failure") instance.add_backfill( PartitionBackfill( backfill_id="fromfailure", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=True, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert not os.path.isfile(_failure_flag_file()) list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"))) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 6 from_failure_filter = PipelineRunsFilter( tags={BACKFILL_ID_TAG: "fromfailure"}) assert instance.get_runs_count(filters=from_failure_filter) == 3 runs = instance.get_runs(filters=from_failure_filter) three, two, one = runs assert one.tags[BACKFILL_ID_TAG] == "fromfailure" assert one.tags[PARTITION_NAME_TAG] == "one" assert one.status == PipelineRunStatus.SUCCESS assert step_did_not_run(instance, one, "always_succeed") assert step_succeeded(instance, one, "conditionally_fail") assert step_succeeded(instance, one, "after_failure") assert two.tags[BACKFILL_ID_TAG] == "fromfailure" assert two.tags[PARTITION_NAME_TAG] == "two" assert two.status == PipelineRunStatus.SUCCESS assert step_did_not_run(instance, one, "always_succeed") assert step_succeeded(instance, one, "conditionally_fail") assert step_succeeded(instance, one, "after_failure") assert three.tags[BACKFILL_ID_TAG] == "fromfailure" assert three.tags[PARTITION_NAME_TAG] == "three" assert three.status == PipelineRunStatus.SUCCESS assert step_did_not_run(instance, one, "always_succeed") assert step_succeeded(instance, one, "conditionally_fail") assert step_succeeded(instance, one, "after_failure")
def on_watch_finish(self, pipeline_run, step_key=None): check.inst_param(pipeline_run, "pipeline_run", PipelineRun) check.opt_str_param(step_key, "step_key") key = self.get_key(pipeline_run, step_key) touchpath = self.complete_artifact_path(pipeline_run.run_id, key) touch_file(touchpath)
def on_compute_finish(self, step_context): touchpath = self.get_local_path( step_context.run_id, step_context.step.key, ComputeIOType.COMPLETE ) touch_file(touchpath)