示例#1
0
def test_snapshot_command_pipeline_solid_subset():

    with get_temp_file_name() as output_file:
        runner = CliRunner()
        result = runner.invoke(
            pipeline_snapshot_command,
            [
                output_file,
                '-y',
                file_relative_path(__file__, 'repository_file.yaml'),
                'foo',
                '--solid-subset',
                'do_input',
            ],
        )

        assert result.exit_code == 0
        # Now that we have the snapshot make sure that it can be properly deserialized
        messages = list(ipc_read_event_stream(output_file))
        assert len(messages) == 1
        external_pipeline_data = messages[0]
        assert isinstance(external_pipeline_data, ExternalPipelineData)
        assert external_pipeline_data.name == 'foo'
        assert (len(external_pipeline_data.pipeline_snapshot.
                    solid_definitions_snapshot.solid_def_snaps) == 1)
示例#2
0
def _execute_pipeline_command(
    repository_file, pipeline_name, environment_dict, mode=None, solid_subset=None
):
    with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir:

        output_file_name = "{}.json".format(uuid4())
        output_file = os.path.join(tmp_dir, output_file_name)

        command = (
            "dagster api execute_pipeline -y {repository_file} {pipeline_name} "
            "{output_file} --environment-dict='{environment_dict}' --mode={mode}".format(
                repository_file=repository_file,
                pipeline_name=pipeline_name,
                output_file=output_file,
                environment_dict=json.dumps(environment_dict),
                mode=mode,
            )
        )

        if solid_subset:
            command += " --solid_subset={solid_subset}".format(solid_subset=",".join(solid_subset))

        os.popen(command)

        for message in ipc_read_event_stream(output_file):
            yield message
示例#3
0
def test_snapshot_command_pipeline_solid_selection():

    with get_temp_file_name() as output_file:
        runner = CliRunner()
        solid_selection = ['do_input']
        result = runner.invoke(
            pipeline_subset_snapshot_command,
            [
                output_file,
                '-y',
                file_relative_path(__file__, 'repository_file.yaml'),
                'foo',
                '--solid-selection={solid_selection}'.format(
                    solid_selection=json.dumps(solid_selection)),
            ],
        )

        assert result.exit_code == 0
        # Now that we have the snapshot make sure that it can be properly deserialized
        messages = list(ipc_read_event_stream(output_file))
        assert len(messages) == 1
        subset_result = messages[0]
        assert isinstance(subset_result, ExternalPipelineSubsetResult)
        assert subset_result.external_pipeline_data.name == 'foo'
        assert (len(subset_result.external_pipeline_data.pipeline_snapshot.
                    solid_definitions_snapshot.solid_def_snaps) == 1)
示例#4
0
def cli_api_execute_run(instance, pipeline_origin, pipeline_run):
    with safe_tempfile_path() as output_file:
        with safe_tempfile_path() as input_file:
            _process = _cli_api_execute_run_process(
                input_file, output_file, instance, pipeline_origin, pipeline_run
            )
            event_list = list(ipc_read_event_stream(output_file))
            check.inst(event_list[0], ExecuteRunArgsLoadComplete)
            return event_list[1:]
示例#5
0
def test_write_empty_stream():

    with safe_tempfile_path() as f:
        with ipc_write_stream(f) as _:
            pass

        messages = []
        for message in ipc_read_event_stream(f):
            messages.append(message)

        assert len(messages) == 0
示例#6
0
def test_write_error_stream():
    with safe_tempfile_path() as filename:
        with ipc_write_stream(filename) as _:
            raise Exception("uh oh")

        messages = []
        for message in ipc_read_event_stream(filename):
            messages.append(message)

        assert len(messages) == 1
        message = messages[0]

        assert isinstance(message, IPCErrorMessage)
        assert "uh oh" in message.serializable_error_info.message
示例#7
0
def test_execute_run_api(repo_handle):
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        pipeline_run = instance.create_run(
            pipeline_name='foo',
            run_id=None,
            run_config={},
            mode='default',
            solids_to_execute=None,
            step_keys_to_execute=None,
            status=None,
            tags=None,
            root_run_id=None,
            parent_run_id=None,
            pipeline_snapshot=None,
            execution_plan_snapshot=None,
            parent_pipeline_snapshot=None,
        )
        with safe_tempfile_path() as output_file_path:
            process = cli_api_execute_run(
                output_file=output_file_path,
                instance=instance,
                pipeline_origin=repo_handle.get_origin(),
                pipeline_run=pipeline_run,
            )

            _stdout, _stderr = process.communicate()

            events = [
                event for event in ipc_read_event_stream(output_file_path)
            ]

    assert len(events) == 12
    assert [
        event.event_type_value for event in events
        if hasattr(event, 'event_type_value'
                   )  # ExecuteRunArgsLoadComplete is synthetic
    ] == [
        'PIPELINE_START',
        'ENGINE_EVENT',
        'STEP_START',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'STEP_START',
        'STEP_INPUT',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'ENGINE_EVENT',
        'PIPELINE_SUCCESS',
    ]
示例#8
0
def test_write_error_stream():
    with tempfile.NamedTemporaryFile() as f:
        with ipc_write_stream(f.name) as _:
            raise Exception('uh oh')

        messages = []
        for message in ipc_read_event_stream(f.name):
            messages.append(message)

        assert len(messages) == 1
        message = messages[0]

        assert isinstance(message, IPCErrorMessage)
        assert 'uh oh' in message.serializable_error_info.message
示例#9
0
def cli_api_launch_run(output_file, instance, pipeline_origin, pipeline_run):
    check.str_param(output_file, 'output_file')
    check.inst_param(instance, 'instance', DagsterInstance)
    check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin)
    check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)

    with safe_tempfile_path() as input_file:
        process = _cli_api_execute_run_process(
            input_file, output_file, instance, pipeline_origin, pipeline_run
        )
        # we need to process this event in order to ensure that the called process loads the input
        event = next(ipc_read_event_stream(output_file))

        check.inst(event, ExecuteRunArgsLoadComplete)

        return process
示例#10
0
def cli_api_execute_run(output_file, instance, pipeline_origin, pipeline_run):
    check.str_param(output_file, 'output_file')
    check.inst_param(instance, 'instance', DagsterInstance)
    check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin)
    check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)

    from dagster.cli.api import ExecuteRunArgsLoadComplete

    with safe_tempfile_path() as input_file:
        write_unary_input(
            input_file,
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            ),
        )

        parts = [
            pipeline_origin.executable_path,
            '-m',
            'dagster',
            'api',
            'execute_run',
            input_file,
            output_file,
        ]

        instance.report_engine_event(
            'About to start process for pipeline "{pipeline_name}" (run_id: {run_id}).'
            .format(pipeline_name=pipeline_run.pipeline_name,
                    run_id=pipeline_run.run_id),
            pipeline_run,
            engine_event_data=EngineEventData(
                marker_start='cli_api_subprocess_init'),
        )

        process = open_ipc_subprocess(parts)

        # we need to process this event in order to ensure that the called process loads the input
        event = next(ipc_read_event_stream(output_file))

        check.inst(event, ExecuteRunArgsLoadComplete)

        return process
示例#11
0
def test_snapshot_command_repository():
    with get_temp_file_name() as output_file:
        runner = CliRunner()
        result = runner.invoke(
            repository_snapshot_command,
            [
                output_file, '-y',
                file_relative_path(__file__, 'repository_file.yaml')
            ],
        )
        assert result.exit_code == 0
        # Now that we have the snapshot make sure that it can be properly deserialized
        messages = list(ipc_read_event_stream(output_file))
        assert len(messages) == 1
        external_repository_data = messages[0]
        assert isinstance(external_repository_data, ExternalRepositoryData)
        assert external_repository_data.name == 'bar'
        assert len(external_repository_data.external_pipeline_datas) == 2
示例#12
0
def test_write_error_with_custom_message():
    with safe_tempfile_path() as filename:
        with ipc_write_stream(filename) as stream:
            try:
                raise Exception("uh oh")
            except:
                stream.send_error(sys.exc_info(), message="custom")

        messages = []
        for message in ipc_read_event_stream(filename):
            messages.append(message)

        assert len(messages) == 1
        ipc_message = messages[0]

        assert isinstance(ipc_message, IPCErrorMessage)
        assert "uh oh" in ipc_message.serializable_error_info.message
        assert ipc_message.message == "custom"
示例#13
0
def test_write_error_with_custom_message():
    with tempfile.NamedTemporaryFile() as f:
        with ipc_write_stream(f.name) as stream:
            try:
                raise Exception('uh oh')
            except:  # pylint: disable=bare-except
                stream.send_error(sys.exc_info(), message='custom')

        messages = []
        for message in ipc_read_event_stream(f.name):
            messages.append(message)

        assert len(messages) == 1
        ipc_message = messages[0]

        assert isinstance(ipc_message, IPCErrorMessage)
        assert 'uh oh' in ipc_message.serializable_error_info.message
        assert ipc_message.message == 'custom'
示例#14
0
def run_detached_container_command(image, command, volumes, output_file):

    try:
        from docker.client import from_env

        client = from_env()

        # This is currently blocking. Has to be updated to run in detached mode. The problem
        # is knowing when the file is ready to be read from.
        client.containers.run(image, command=command, volumes=volumes, auto_remove=False)

        for message in ipc_read_event_stream(output_file):
            yield message

    except ImportError:
        warnings.warn(
            "Cannot load docker environment without the python package docker. Ensure that dagster[docker] or the python package docker is installed."
        )
        raise
示例#15
0
def test_write_read_stream():
    @whitelist_for_serdes
    class TestMessage(namedtuple("_TestMessage", "message")):
        def __new__(cls, message):
            return super(TestMessage, cls).__new__(cls, message)

    with safe_tempfile_path() as f:
        message_1 = TestMessage(message="hello")
        message_2 = TestMessage(message="world")

        with ipc_write_stream(f) as stream:
            stream.send(message_1)
            stream.send(message_2)

        messages = []
        for message in ipc_read_event_stream(f):
            messages.append(message)

        assert messages[0] == message_1
        assert messages[1] == message_2
示例#16
0
def sync_get_external_repository(location_handle):
    check.inst_param(location_handle, 'location_handle', LocationHandle)

    with get_temp_file_name() as output_file:

        parts = ['dagster', 'api', 'snapshot', 'repository', output_file] + xplat_shlex_split(
            location_handle.pointer.get_cli_args()
        )
        returncode = subprocess.check_call(parts)
        check.invariant(returncode == 0, 'dagster api cli invocation did not complete successfully')

        messages = list(ipc_read_event_stream(output_file))
        check.invariant(len(messages) == 1)

        external_repository_data = messages[0]

        check.inst(external_repository_data, ExternalRepositoryData)

        return ExternalRepository(
            external_repository_data,
            RepositoryHandle(external_repository_data.name, location_handle),
        )
示例#17
0
def api_execute_pipeline(instance, recon_repo, pipeline_name, environment_dict,
                         mode, solid_subset):
    check.inst_param(instance, 'instance', DagsterInstance)
    check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository)
    check.str_param(pipeline_name, 'pipeline_name')
    check.dict_param(environment_dict, 'environment_dict')
    check.str_param(mode, 'mode')
    check.opt_list_param(solid_subset, 'solid_subset', of_type=str)

    check.param_invariant(recon_repo.yaml_path, 'recon_repo',
                          'Only support yaml-based repositories for now')

    with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir:

        output_file_name = "{}.json".format(uuid.uuid4())
        output_file = os.path.join(tmp_dir, output_file_name)

        command = (
            "dagster api execute_pipeline -y {repository_file} {pipeline_name} "
            "{output_file} --environment-dict='{environment_dict}' --mode={mode} "
            "--instance-ref='{instance_ref}'").format(
                repository_file=recon_repo.yaml_path,
                pipeline_name=pipeline_name,
                output_file=output_file,
                environment_dict=json.dumps(environment_dict),
                mode=mode,
                instance_ref=serialize_dagster_namedtuple(instance.get_ref()),
            )

        if solid_subset:
            command += " --solid_subset={solid_subset}".format(
                solid_subset=",".join(solid_subset))

        os.popen(command)

        for message in ipc_read_event_stream(output_file):
            yield message
def sync_cli_api_execute_run(instance, pipeline_origin, pipeline_name,
                             environment_dict, mode, solids_to_execute):
    with safe_tempfile_path() as output_file_path:
        pipeline_run = instance.create_run(
            pipeline_name=pipeline_name,
            run_id=None,
            environment_dict=environment_dict,
            mode=mode,
            solids_to_execute=solids_to_execute,
            step_keys_to_execute=None,
            status=None,
            tags=None,
            root_run_id=None,
            parent_run_id=None,
            pipeline_snapshot=None,
            execution_plan_snapshot=None,
            parent_pipeline_snapshot=None,
        )
        process = cli_api_execute_run(output_file_path, instance,
                                      pipeline_origin, pipeline_run)

        _stdout, _stderr = process.communicate()
        for message in ipc_read_event_stream(output_file_path):
            yield message