def watch(self, run_id, key):
        key = self._key(run_id, key)
        if key in self._watchers:
            return

        update_paths = [
            self._manager.get_local_path(run_id, key, ComputeIOType.STDOUT),
            self._manager.get_local_path(run_id, key, ComputeIOType.STDERR),
        ]
        complete_paths = [self._manager.complete_artifact_path(run_id, key)]
        directory = os.path.dirname(
            self._manager.get_local_path(run_id, key, ComputeIOType.STDERR))

        ensure_dir(directory)
        self._watchers[key] = self._observer.schedule(
            LocalComputeLogFilesystemEventHandler(self, run_id, key,
                                                  update_paths,
                                                  complete_paths),
            str(directory),
        )
示例#2
0
def mirror_step_io(step_context):
    # https://github.com/dagster-io/dagster/issues/1698
    if not should_capture_stdout(step_context.instance):
        yield
        return

    check.inst_param(step_context, 'step_context', SystemStepExecutionContext)
    filebase = _filebase(step_context.instance, step_context.run_id,
                         step_context.step.key)
    outpath = _filepath(filebase, IO_TYPE_STDOUT)
    errpath = _filepath(filebase, IO_TYPE_STDERR)
    touchpath = _filepath(filebase, IO_TYPE_COMPLETE)

    ensure_dir(os.path.dirname(outpath))
    ensure_dir(os.path.dirname(errpath))

    with mirror_stream(sys.stderr, errpath):
        with mirror_stream(sys.stdout, outpath):
            yield

    # touch the file to signify that compute is complete
    touch_file(touchpath)
示例#3
0
def test_compute_log_manager_subscription_updates():
    from dagster.core.storage.local_compute_log_manager import LocalComputeLogManager

    with tempfile.TemporaryDirectory() as temp_dir:
        compute_log_manager = LocalComputeLogManager(temp_dir,
                                                     polling_timeout=0.5)
        run_id = "fake_run_id"
        step_key = "spew"
        stdout_path = compute_log_manager.get_local_path(
            run_id, step_key, ComputeIOType.STDOUT)

        # make sure the parent directory to be watched exists, file exists
        ensure_dir(os.path.dirname(stdout_path))
        touch_file(stdout_path)

        # set up the subscription
        messages = []
        observable = compute_log_manager.observable(run_id, step_key,
                                                    ComputeIOType.STDOUT)
        observable.subscribe(messages.append)

        # returns a single update, with 0 data
        assert len(messages) == 1
        last_chunk = messages[-1]
        assert not last_chunk.data
        assert last_chunk.cursor == 0

        with open(stdout_path, "a+") as f:
            print(HELLO_SOLID, file=f)  # pylint:disable=print-call

        # wait longer than the watchdog timeout
        time.sleep(1)
        assert len(messages) == 2
        last_chunk = messages[-1]
        assert last_chunk.data
        assert last_chunk.cursor > 0
示例#4
0
 def _download_to_local(self, run_id, key, io_type):
     path = self.get_local_path(run_id, key, io_type)
     ensure_dir(os.path.dirname(path))
     with open(path, "wb") as fileobj:
         self._bucket.blob(self._bucket_key(run_id, key, io_type)).download_to_file(fileobj)