Exemplo n.º 1
0
def test_compute_log_manager_subscription_updates():
    from dagster.core.storage.local_compute_log_manager import LocalComputeLogManager

    with tempfile.TemporaryDirectory() as temp_dir:
        compute_log_manager = LocalComputeLogManager(temp_dir, polling_timeout=0.5)
        run_id = "fake_run_id"
        step_key = "spew"
        stdout_path = compute_log_manager.get_local_path(run_id, step_key, ComputeIOType.STDOUT)

        # make sure the parent directory to be watched exists, file exists
        ensure_dir(os.path.dirname(stdout_path))
        touch_file(stdout_path)

        # set up the subscription
        messages = []
        observable = compute_log_manager.observable(run_id, step_key, ComputeIOType.STDOUT)
        observable.subscribe(messages.append)

        # returns a single update, with 0 data
        assert len(messages) == 1
        last_chunk = messages[-1]
        assert not last_chunk.data
        assert last_chunk.cursor == 0

        with open(stdout_path, "a+") as f:
            print(HELLO_SOLID, file=f)  # pylint:disable=print-call

        # wait longer than the watchdog timeout
        time.sleep(1)
        assert len(messages) == 2
        last_chunk = messages[-1]
        assert last_chunk.data
        assert last_chunk.cursor > 0
Exemplo n.º 2
0
def test_bad_workspace_yaml_load():
    with TemporaryDirectory() as temp_dir:
        touch_file(os.path.join(temp_dir, "foo.yaml"))

        with pytest.raises(
                CheckError,
                match=
            ("Invariant failed. Description: Could not parse a workspace config from the "
             "yaml file at"),
        ):
            with load_workspace_from_yaml_paths(
                [os.path.join(temp_dir, "foo.yaml")]):
                pass
Exemplo n.º 3
0
def mirror_step_io(step_context):
    # https://github.com/dagster-io/dagster/issues/1698
    if not should_capture_stdout(step_context.instance):
        yield
        return

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    filebase = _filebase(step_context.instance, step_context.run_id,
                         step_context.step.key)
    outpath = _filepath(filebase, IO_TYPE_STDOUT)
    errpath = _filepath(filebase, IO_TYPE_STDERR)
    touchpath = _filepath(filebase, IO_TYPE_COMPLETE)

    ensure_dir(os.path.dirname(outpath))
    ensure_dir(os.path.dirname(errpath))

    with mirror_stream(sys.stderr, errpath):
        with mirror_stream(sys.stdout, outpath):
            yield

    # touch the file to signify that compute is complete
    touch_file(touchpath)
Exemplo n.º 4
0
 def on_compute_finish(self, step_context):
     touchpath = self.complete_artifact_path(step_context.run_id, step_context.step.key)
     touch_file(touchpath)
Exemplo n.º 5
0
def test_failure_backfill(external_repo_context):
    output_file = _failure_flag_file()
    with instance_for_context(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        external_partition_set = external_repo.get_external_partition_set(
            "conditionally_fail_partition_set")
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="shouldfail",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=False,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        assert instance.get_runs_count() == 0

        try:
            touch_file(output_file)
            list(
                execute_backfill_iteration(
                    instance, grpc_server_registry,
                    get_default_daemon_logger("BackfillDaemon")))
            wait_for_all_runs_to_start(instance)
        finally:
            os.remove(output_file)

        assert instance.get_runs_count() == 3
        runs = instance.get_runs()
        three, two, one = runs
        assert one.tags[BACKFILL_ID_TAG] == "shouldfail"
        assert one.tags[PARTITION_NAME_TAG] == "one"
        assert one.status == PipelineRunStatus.FAILURE
        assert step_succeeded(instance, one, "always_succeed")
        assert step_failed(instance, one, "conditionally_fail")
        assert step_did_not_run(instance, one, "after_failure")

        assert two.tags[BACKFILL_ID_TAG] == "shouldfail"
        assert two.tags[PARTITION_NAME_TAG] == "two"
        assert two.status == PipelineRunStatus.FAILURE
        assert step_succeeded(instance, two, "always_succeed")
        assert step_failed(instance, two, "conditionally_fail")
        assert step_did_not_run(instance, two, "after_failure")

        assert three.tags[BACKFILL_ID_TAG] == "shouldfail"
        assert three.tags[PARTITION_NAME_TAG] == "three"
        assert three.status == PipelineRunStatus.FAILURE
        assert step_succeeded(instance, three, "always_succeed")
        assert step_failed(instance, three, "conditionally_fail")
        assert step_did_not_run(instance, three, "after_failure")

        instance.add_backfill(
            PartitionBackfill(
                backfill_id="fromfailure",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=True,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))

        assert not os.path.isfile(_failure_flag_file())
        list(
            execute_backfill_iteration(
                instance, grpc_server_registry,
                get_default_daemon_logger("BackfillDaemon")))
        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 6
        from_failure_filter = PipelineRunsFilter(
            tags={BACKFILL_ID_TAG: "fromfailure"})
        assert instance.get_runs_count(filters=from_failure_filter) == 3

        runs = instance.get_runs(filters=from_failure_filter)
        three, two, one = runs

        assert one.tags[BACKFILL_ID_TAG] == "fromfailure"
        assert one.tags[PARTITION_NAME_TAG] == "one"
        assert one.status == PipelineRunStatus.SUCCESS
        assert step_did_not_run(instance, one, "always_succeed")
        assert step_succeeded(instance, one, "conditionally_fail")
        assert step_succeeded(instance, one, "after_failure")

        assert two.tags[BACKFILL_ID_TAG] == "fromfailure"
        assert two.tags[PARTITION_NAME_TAG] == "two"
        assert two.status == PipelineRunStatus.SUCCESS
        assert step_did_not_run(instance, one, "always_succeed")
        assert step_succeeded(instance, one, "conditionally_fail")
        assert step_succeeded(instance, one, "after_failure")

        assert three.tags[BACKFILL_ID_TAG] == "fromfailure"
        assert three.tags[PARTITION_NAME_TAG] == "three"
        assert three.status == PipelineRunStatus.SUCCESS
        assert step_did_not_run(instance, one, "always_succeed")
        assert step_succeeded(instance, one, "conditionally_fail")
        assert step_succeeded(instance, one, "after_failure")
 def on_watch_finish(self, pipeline_run, step_key=None):
     check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
     check.opt_str_param(step_key, "step_key")
     key = self.get_key(pipeline_run, step_key)
     touchpath = self.complete_artifact_path(pipeline_run.run_id, key)
     touch_file(touchpath)
 def on_compute_finish(self, step_context):
     touchpath = self.get_local_path(
         step_context.run_id, step_context.step.key, ComputeIOType.COMPLETE
     )
     touch_file(touchpath)