def _mgr_fn(recon_repo):
            """Goes out of process but same process as host process"""
            check.inst_param(recon_repo, "recon_repo",
                             ReconstructableRepository)

            empty_repo = ReconstructableRepository.from_legacy_repository_yaml(
                file_relative_path(__file__, "empty_repo.yaml"))

            with Workspace([
                    RepositoryLocationHandle.create_python_env_location(
                        loadable_target_origin=LoadableTargetOrigin(
                            executable_path=sys.executable,
                            python_file=file_relative_path(
                                __file__, "setup.py"),
                            attribute="test_repo",
                        ),
                        location_name="test",
                    ),
                    RepositoryLocationHandle.create_in_process_location(
                        empty_repo.pointer),
                    RepositoryLocationHandle.create_python_env_location(
                        loadable_target_origin=LoadableTargetOrigin(
                            executable_path=sys.executable,
                            python_file=file_relative_path(
                                __file__, "setup.py"),
                            attribute="empty_repo",
                        ),
                        location_name="empty_repo",
                    ),
            ]) as workspace:
                yield workspace
        def _mgr_fn(recon_repo):
            """Goes out of process but same process as host process"""
            check.inst_param(recon_repo, "recon_repo", ReconstructableRepository)

            with Workspace(
                [
                    ManagedGrpcPythonEnvRepositoryLocationOrigin(
                        loadable_target_origin=LoadableTargetOrigin(
                            executable_path=sys.executable,
                            python_file=file_relative_path(__file__, "setup.py"),
                            attribute="test_repo",
                        ),
                        location_name="test",
                    ),
                    ManagedGrpcPythonEnvRepositoryLocationOrigin(
                        loadable_target_origin=LoadableTargetOrigin(
                            executable_path=sys.executable,
                            python_file=file_relative_path(__file__, "setup.py"),
                            attribute="empty_repo",
                        ),
                        location_name="empty_repo",
                    ),
                ]
            ) as workspace:
                yield workspace
示例#3
0
def test_raise_on_error(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        fail_pod_on_run_failure=True,
    )
    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    loadable_target_origin = LoadableTargetOrigin(python_file=__file__)

    with instance_for_test() as instance:
        with in_process_test_workspace(instance, loadable_target_origin) as workspace:
            location = workspace.get_repository_location(workspace.repository_location_names[0])
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            # Launch the run in a fake Dagster instance.
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                external_pipeline_origin=fake_external_pipeline.get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(),
            )
            k8s_run_launcher.register_instance(instance)
            k8s_run_launcher.launch_run(LaunchRunContext(run, workspace))

        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"

        container = kwargs["body"].spec.template.spec.containers[0]
        args = container.args
        assert (
            args
            == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=True,
            ).get_command_args()
        )
def test_process_server_registry():
    origin = ManagedGrpcPythonEnvRepositoryLocationOrigin(
        loadable_target_origin=LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="repo",
            python_file=file_relative_path(__file__,
                                           "test_grpc_server_registry.py"),
        ), )

    with ProcessGrpcServerRegistry(reload_interval=5,
                                   heartbeat_ttl=10,
                                   startup_timeout=5) as registry:
        endpoint_one = registry.get_grpc_endpoint(origin)
        endpoint_two = registry.get_grpc_endpoint(origin)

        assert endpoint_two == endpoint_one

        assert _can_connect(origin, endpoint_one)
        assert _can_connect(origin, endpoint_two)

        start_time = time.time()
        while True:

            # Registry should return a new server endpoint after 5 seconds
            endpoint_three = registry.get_grpc_endpoint(origin)

            if endpoint_three.server_id != endpoint_one.server_id:
                break

            if time.time() - start_time > 15:
                raise Exception("Server ID never changed")

            time.sleep(1)

        assert _can_connect(origin, endpoint_three)

        start_time = time.time()
        while True:
            # Server at endpoint_one should eventually die due to heartbeat failure

            if not _can_connect(origin, endpoint_one):
                break

            if time.time() - start_time > 30:
                raise Exception(
                    "Old Server never died after process manager released it")

            time.sleep(1)

        # Make one more fresh process, then leave the context so that it will be cleaned up
        while True:
            endpoint_four = registry.get_grpc_endpoint(origin)

            if endpoint_four.server_id != endpoint_three.server_id:
                assert _can_connect(origin, endpoint_four)
                break

    registry.wait_for_processes()
    assert not _can_connect(origin, endpoint_three)
    assert not _can_connect(origin, endpoint_four)
示例#5
0
文件: api.py 项目: keypointt/dagster
def grpc_command(
    port=None,
    socket=None,
    host="localhost",
    max_workers=None,
    heartbeat=False,
    heartbeat_timeout=30,
    lazy_load_user_code=False,
    ipc_output_file=None,
    fixed_server_id=None,
    override_system_timezone=None,
    **kwargs,
):
    if seven.IS_WINDOWS and port is None:
        raise click.UsageError(
            "You must pass a valid --port/-p on Windows: --socket/-s not supported."
        )
    if not (port or socket and not (port and socket)):
        raise click.UsageError("You must pass one and only one of --port/-p or --socket/-s.")

    loadable_target_origin = None
    if any(
        kwargs[key]
        for key in [
            "attribute",
            "working_directory",
            "module_name",
            "package_name",
            "python_file",
            "empty_working_directory",
        ]
    ):
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute=kwargs["attribute"],
            working_directory=get_working_directory_from_kwargs(kwargs),
            module_name=kwargs["module_name"],
            python_file=kwargs["python_file"],
            package_name=kwargs["package_name"],
        )

    with (
        mock_system_timezone(override_system_timezone)
        if override_system_timezone
        else nullcontext()
    ):
        server = DagsterGrpcServer(
            port=port,
            socket=socket,
            host=host,
            loadable_target_origin=loadable_target_origin,
            max_workers=max_workers,
            heartbeat=heartbeat,
            heartbeat_timeout=heartbeat_timeout,
            lazy_load_user_code=lazy_load_user_code,
            ipc_output_file=ipc_output_file,
            fixed_server_id=fixed_server_id,
        )

        server.serve()
示例#6
0
def test_heartbeat():
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="bar_repo",
        python_file=file_relative_path(__file__, "grpc_repo.py"),
    )
    server = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin,
        max_workers=2,
        heartbeat=True,
        heartbeat_timeout=1,
    )
    with server.create_ephemeral_client() as client:
        assert server.server_process.poll() is None

        # heartbeat keeps the server alive
        time.sleep(0.5)
        client.heartbeat()
        time.sleep(0.5)
        client.heartbeat()
        time.sleep(0.5)
        assert server.server_process.poll() is None

        start_time = time.time()
        while (time.time() - start_time) < 10:
            if server.server_process.poll() is not None:
                return
            time.sleep(0.1)

        raise Exception(
            "Timed out waiting for server to terminate after heartbeat stopped"
        )
示例#7
0
def test_heartbeat():
    loadable_target_origin = LoadableTargetOrigin(
        attribute="bar_repo",
        python_file=file_relative_path(__file__, "grpc_repo.py"),
    )
    server = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin,
        max_workers=2,
        heartbeat=True,
        heartbeat_timeout=1,
    )
    with server.create_ephemeral_client() as client:
        assert server.server_process.poll() is None

        # heartbeat keeps the server alive
        time.sleep(0.5)
        client.heartbeat()
        time.sleep(0.5)
        client.heartbeat()
        time.sleep(0.5)
        assert server.server_process.poll() is None

        # without the heartbeat, the server dies
        time.sleep(2)
        assert server.server_process.poll() is not None
def test_error_repo_in_registry():
    error_origin = ManagedGrpcPythonEnvRepositoryLocationOrigin(
        loadable_target_origin=LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="error_repo",
            python_file=file_relative_path(__file__, "error_repo.py"),
        ),
    )
    with ProcessGrpcServerRegistry(reload_interval=5, heartbeat_ttl=10) as registry:
        # Repository with a loading error does not raise an exception
        endpoint = registry.get_grpc_endpoint(error_origin)

        # But using that endpoint to load a location results in an error
        with pytest.raises(DagsterUserCodeProcessError, match="object is not callable"):
            with GrpcServerRepositoryLocation(
                origin=error_origin,
                server_id=endpoint.server_id,
                port=endpoint.port,
                socket=endpoint.socket,
                host=endpoint.host,
                watch_server=False,
            ):
                pass

        # that error is idempotent
        with pytest.raises(DagsterUserCodeProcessError, match="object is not callable"):
            with GrpcServerRepositoryLocation(
                origin=error_origin,
                server_id=endpoint.server_id,
                port=endpoint.port,
                socket=endpoint.socket,
                host=endpoint.host,
                watch_server=False,
            ):
                pass
示例#9
0
def test_list_command_grpc_socket():
    runner = CliRunner()

    with GrpcServerProcess(
        loadable_target_origin=LoadableTargetOrigin(
            python_file=file_relative_path(__file__, "test_cli_commands.py"), attribute="bar"
        ),
    ).create_ephemeral_client() as api_client:
        execute_list_command(
            {"grpc_socket": api_client.socket}, no_print, DagsterInstance.local_temp(),
        )
        execute_list_command(
            {"grpc_socket": api_client.socket, "grpc_host": api_client.host},
            no_print,
            DagsterInstance.local_temp(),
        )

        result = runner.invoke(pipeline_list_command, ["--grpc-socket", api_client.socket])
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            ["--grpc-socket", api_client.socket, "--grpc-host", api_client.host],
        )
        assert_correct_bar_repository_output(result)
示例#10
0
文件: load.py 项目: G9999/dagster
def location_handle_from_python_file(
    python_file,
    attribute,
    working_directory,
    user_process_api,
    location_name=None,
    executable_path=sys.executable,
):
    check.str_param(python_file, "python_file")
    check.opt_str_param(attribute, "attribute")
    check.opt_str_param(working_directory, "working_directory")
    check.inst_param(user_process_api, "user_process_api", UserProcessApi)
    check.opt_str_param(location_name, "location_name")

    loadable_target_origin = LoadableTargetOrigin(
        executable_path=executable_path,
        python_file=python_file,
        module_name=None,
        working_directory=working_directory,
        attribute=attribute,
    )

    return RepositoryLocationHandle.create_python_env_location(
        loadable_target_origin=loadable_target_origin,
        location_name=location_name,
        user_process_api=user_process_api,
    )
def test_server_down():
    with instance_for_test() as instance:
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="nope",
            python_file=file_relative_path(__file__, "test_default_run_launcher.py"),
        )

        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True
        )

        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.create_from_repository_location_origin(
                    GrpcServerRepositoryLocationOrigin(
                        location_name="test",
                        port=api_client.port,
                        socket=api_client.socket,
                        host=api_client.host,
                    )
                )
            )

            external_pipeline = repository_location.get_repository(
                "nope"
            ).get_full_external_pipeline("sleepy_pipeline")

            pipeline_run = instance.create_run_for_pipeline(
                pipeline_def=sleepy_pipeline, run_config=None
            )

            instance.launch_run(pipeline_run.run_id, external_pipeline)

            poll_for_step_start(instance, pipeline_run.run_id)

            launcher = instance.run_launcher
            assert launcher.can_terminate(pipeline_run.run_id)

            original_run_tags = instance.get_run_by_id(pipeline_run.run_id).tags[GRPC_INFO_TAG]

            # Replace run tags with an invalid port
            instance.add_run_tags(
                pipeline_run.run_id,
                {
                    GRPC_INFO_TAG: seven.json.dumps(
                        merge_dicts({"host": "localhost"}, {"port": find_free_port()})
                    )
                },
            )

            assert not launcher.can_terminate(pipeline_run.run_id)

            instance.add_run_tags(
                pipeline_run.run_id, {GRPC_INFO_TAG: original_run_tags,},
            )

            assert launcher.terminate(pipeline_run.run_id)

        server_process.wait()
示例#12
0
def loadable_target_origin():
    return LoadableTargetOrigin(
        executable_path=sys.executable,
        module_name="dagster_tests.scheduler_tests.test_scheduler_run",
        working_directory=os.getcwd(),
        attribute="the_repo",
    )
示例#13
0
 def get_loadable_target_origin(self, executable_path):
     return LoadableTargetOrigin(
         executable_path=executable_path,
         python_file=self.python_file,
         attribute=self.fn_name,
         working_directory=self.working_directory,
     )
示例#14
0
def get_deployed_grpc_server_workspace(instance):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="nope",
        python_file=file_relative_path(__file__,
                                       "test_default_run_launcher.py"),
    )
    server_process = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin)

    try:
        with server_process.create_ephemeral_client(
        ):  # shuts down when leaves this context
            with WorkspaceProcessContext(
                    instance,
                    GrpcServerTarget(
                        host="localhost",
                        socket=server_process.socket,
                        port=server_process.port,
                        location_name="test",
                    ),
            ) as workspace_process_context:
                yield workspace_process_context.create_request_context()
    finally:
        server_process.wait()
示例#15
0
def get_external_pipeline_from_grpc_server_repository(pipeline_name):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="nope",
        python_file=file_relative_path(__file__,
                                       "test_default_run_launcher.py"),
    )
    server_process = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin)

    try:
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.
                create_from_repository_location_origin(
                    GrpcServerRepositoryLocationOrigin(
                        location_name="test",
                        port=api_client.port,
                        socket=api_client.socket,
                        host=api_client.host,
                    )))

            yield repository_location.get_repository(
                "nope").get_full_external_pipeline(pipeline_name)
    finally:
        server_process.wait()
def test_bad_load():
    with _default_instance() as instance:
        instance = DagsterInstance.get()

        working_directory = os.path.dirname(__file__)

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            python_file=__file__,
            attribute="doesnt_exist",
            working_directory=working_directory,
        )

        repo_origin = ExternalRepositoryOrigin(
            ManagedGrpcPythonEnvRepositoryLocationOrigin(
                loadable_target_origin=loadable_target_origin
            ),
            "doesnt_exist",
        )

        schedule_origin = repo_origin.get_job_origin("also_doesnt_exist")

        result = sync_launch_scheduled_execution(schedule_origin)
        assert isinstance(result, ScheduledExecutionFailed)
        assert "doesnt_exist not found at module scope in file" in result.errors[0].to_string()

        ticks = instance.get_job_ticks(schedule_origin.get_id())
        assert ticks[0].status == JobTickStatus.FAILURE
        assert "doesnt_exist not found at module scope in file" in ticks[0].error.message
def test_origin_ids_stable():
    # This test asserts fixed schedule origin IDs to prevent any changes from
    # accidentally shifting these ids that are persisted to ScheduleStorage

    python_origin = ExternalJobOrigin(
        ExternalRepositoryOrigin(
            ManagedGrpcPythonEnvRepositoryLocationOrigin(
                LoadableTargetOrigin(
                    executable_path="/fake/executable",
                    python_file="/fake/file/path",
                    attribute="fake_attribute",
                )
            ),
            "fake_repo",
        ),
        "fake_schedule",
    )
    assert python_origin.get_id() == "eb01cc697463ba614a67567fdeaafcccc60f0fc4"

    grpc_origin = ExternalJobOrigin(
        ExternalRepositoryOrigin(
            GrpcServerRepositoryLocationOrigin(host="fakehost", port=52618), "repo_name"
        ),
        "fake_schedule",
    )

    assert grpc_origin.get_id() == "0961ecddbddfc71104adf036ebe8cd97a94dc77b"
示例#18
0
def test_registry_multithreading():
    origin = ManagedGrpcPythonEnvRepositoryLocationOrigin(
        loadable_target_origin=LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="repo",
            python_file=file_relative_path(__file__,
                                           "test_grpc_server_registry.py"),
        ), )

    with ProcessGrpcServerRegistry(reload_interval=300,
                                   heartbeat_ttl=600) as registry:
        endpoint = registry.get_grpc_endpoint(origin)

        threads = []
        success_events = []
        for _index in range(5):
            event = threading.Event()
            thread = threading.Thread(target=_registry_thread,
                                      args=(origin, registry, endpoint, event))
            threads.append(thread)
            success_events.append(event)
            thread.start()

        for thread in threads:
            thread.join()

        for event in success_events:
            assert event.is_set()

        assert _can_connect(origin, endpoint)

    registry.wait_for_processes()
    assert not _can_connect(origin, endpoint)
示例#19
0
def test_list_command_grpc_socket():
    with instance_for_test() as instance:
        runner = CliRunner()

        server_process = GrpcServerProcess(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=file_relative_path(__file__, "test_cli_commands.py"),
                attribute="bar",
            ),
        )

        with server_process.create_ephemeral_client() as api_client:
            execute_list_command(
                {"grpc_socket": api_client.socket}, no_print, instance,
            )
            execute_list_command(
                {"grpc_socket": api_client.socket, "grpc_host": api_client.host},
                no_print,
                instance,
            )

            result = runner.invoke(pipeline_list_command, ["--grpc-socket", api_client.socket])
            assert_correct_bar_repository_output(result)

            result = runner.invoke(
                pipeline_list_command,
                ["--grpc-socket", api_client.socket, "--grpc-host", api_client.host],
            )
            assert_correct_bar_repository_output(result)

        server_process.wait()
示例#20
0
def sync_list_repositories_ephemeral_grpc(
    executable_path,
    python_file,
    module_name,
    working_directory,
    attribute,
    package_name,
):
    from dagster.grpc.client import ephemeral_grpc_api_client

    check.str_param(executable_path, "executable_path")
    check.opt_str_param(python_file, "python_file")
    check.opt_str_param(module_name, "module_name")
    check.opt_str_param(working_directory, "working_directory")
    check.opt_str_param(package_name, "package_name")

    with ephemeral_grpc_api_client(loadable_target_origin=LoadableTargetOrigin(
            executable_path=executable_path,
            module_name=module_name,
            python_file=python_file,
            working_directory=working_directory,
            attribute=attribute,
            package_name=package_name,
    )) as api_client:
        return sync_list_repositories_grpc(api_client)
def test_run_always_finishes():  # pylint: disable=redefined-outer-name
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(
            temp_dir,
            overrides={
                "run_launcher": {
                    "module": "dagster.core.launcher.grpc_run_launcher",
                    "class": "GrpcRunLauncher",
                }
            },
        )

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=slow_pipeline, run_config=None)
        run_id = pipeline_run.run_id

        loadable_target_origin = LoadableTargetOrigin(
            attribute="nope",
            python_file=file_relative_path(__file__,
                                           "test_default_run_launcher.py"),
        )
        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4)
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.create_grpc_server_location(
                    location_name="test",
                    port=api_client.port,
                    socket=api_client.socket,
                    host=api_client.host,
                ))

            external_pipeline = repository_location.get_repository(
                "nope").get_full_external_pipeline("slow_pipeline")

            assert instance.get_run_by_id(
                run_id).status == PipelineRunStatus.NOT_STARTED

            launcher = instance.run_launcher
            launcher.launch_run(instance=instance,
                                run=pipeline_run,
                                external_pipeline=external_pipeline)

        # Server process now receives shutdown event, run has not finished yet
        pipeline_run = instance.get_run_by_id(run_id)
        assert not pipeline_run.is_finished
        assert server_process.server_process.poll() is None

        # Server should wait until run finishes, then shutdown
        pipeline_run = poll_for_finished_run(instance, run_id)
        assert pipeline_run.status == PipelineRunStatus.SUCCESS

        start_time = time.time()
        while server_process.server_process.poll() is None:
            time.sleep(0.05)
            # Verify server process cleans up eventually
            assert time.time() - start_time < 5

        server_process.wait()
示例#22
0
def test_list_command_deployed_grpc():
    runner = CliRunner()

    with instance_for_test() as instance:
        server_process = GrpcServerProcess(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=file_relative_path(__file__,
                                               "test_cli_commands.py"),
                attribute="bar",
            ),
            force_port=True,
        )

        with server_process.create_ephemeral_client() as api_client:
            result = runner.invoke(pipeline_list_command,
                                   ["--grpc-port", api_client.port])
            assert_correct_bar_repository_output(result)

            result = runner.invoke(
                pipeline_list_command,
                [
                    "--grpc-port", api_client.port, "--grpc-host",
                    api_client.host
                ],
            )
            assert_correct_bar_repository_output(result)

            result = runner.invoke(pipeline_list_command,
                                   ["--grpc-port", api_client.port])
            assert_correct_bar_repository_output(result)

            result = runner.invoke(
                pipeline_list_command,
                [
                    "--grpc-port", api_client.port, "--grpc-socket",
                    "foonamedsocket"
                ],
            )
            assert result.exit_code != 0

            execute_list_command(
                {"grpc_port": api_client.port},
                no_print,
                instance,
            )

            # Can't supply both port and socket
            with pytest.raises(UsageError):
                execute_list_command(
                    {
                        "grpc_port": api_client.port,
                        "grpc_socket": "foonamedsocket"
                    },
                    no_print,
                    instance,
                )

        server_process.wait()
示例#23
0
 def fake_repo_target():
     return ExternalRepositoryOrigin(
         ManagedGrpcPythonEnvRepositoryLocationOrigin(
             LoadableTargetOrigin(executable_path=sys.executable,
                                  module_name="fake",
                                  attribute="fake"), ),
         "fake_repo_name",
     )
def test_cancel_run():
    with instance_for_test() as instance:

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable, python_file=__file__, working_directory=None,
        )

        server_process = GrpcServerProcess(loadable_target_origin, max_workers=10)

        with server_process.create_ephemeral_client() as api_client:
            streaming_results = []

            pipeline_run = instance.create_run_for_pipeline(
                streaming_pipeline, run_config={"solids": {"streamer": {"config": {"length": 20}}}},
            )
            execute_run_args = ExecuteRunArgs(
                pipeline_origin=PipelineGrpcServerOrigin(
                    pipeline_name="streaming_pipeline",
                    repository_origin=RepositoryGrpcServerOrigin(
                        host="localhost",
                        socket=api_client.socket,
                        port=api_client.port,
                        repository_name="test_repository",
                    ),
                ),
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            )
            stream_events_result_thread = threading.Thread(
                target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]
            )
            stream_events_result_thread.daemon = True
            stream_events_result_thread.start()
            poll_for_step_start(instance, pipeline_run.run_id)

            res = api_client.cancel_execution(
                cancel_execution_request=CancelExecutionRequest(run_id=pipeline_run.run_id)
            )
            assert res.success is True

            poll_for_run(instance, pipeline_run.run_id)

            logs = instance.all_logs(pipeline_run.run_id)
            assert (
                len(
                    [
                        ev
                        for ev in logs
                        if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION"
                    ]
                )
                < 20
            )

            # soft termination
            assert [ev for ev in logs if ev.dagster_event.event_type_value == "STEP_FAILURE"]

        server_process.wait()
示例#25
0
文件: utils.py 项目: sd2k/dagster
def get_bar_repo_repository_location_handle():
    return RepositoryLocationHandle.create_python_env_location(
        loadable_target_origin=LoadableTargetOrigin(
            executable_path=sys.executable,
            python_file=file_relative_path(__file__, "api_tests_repo.py"),
            attribute="bar_repo",
        ),
        location_name="bar_repo_location",
    )
示例#26
0
def get_example_repository_location_handle():
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=file_relative_path(__file__, "repo.py"),
    )
    location_name = "example_repo_location"

    origin = ManagedGrpcPythonEnvRepositoryLocationOrigin(loadable_target_origin, location_name)

    return RepositoryLocationHandle.create_from_repository_location_origin(origin)
示例#27
0
def test_custom_loadable_target_origin():
    # Verifies that you can swap out the LoadableTargetOrigin for the same
    # repository location origin
    first_loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="repo",
        python_file=file_relative_path(__file__,
                                       "test_grpc_server_registry.py"),
    )

    second_loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="other_repo",
        python_file=file_relative_path(__file__,
                                       "test_grpc_server_registry.py"),
    )

    origin = RegisteredRepositoryLocationOrigin("test_location")

    with TestMockProcessGrpcServerRegistry() as registry:
        with DynamicWorkspace(registry) as workspace:
            registry.mocked_loadable_target_origin = first_loadable_target_origin

            endpoint_one = registry.get_grpc_endpoint(origin)
            assert registry.get_grpc_endpoint(
                origin).server_id == endpoint_one.server_id

            location_one = workspace.get_location(origin)
            assert location_one.has_repository("repo")

            # Swap in a new LoadableTargetOrigin - the same origin new returns a different
            # endpoint and repository
            registry.mocked_loadable_target_origin = second_loadable_target_origin

            endpoint_two = registry.get_grpc_endpoint(origin)

            assert endpoint_two.server_id != endpoint_one.server_id
            location_two = workspace.get_location(origin)

            assert location_two.has_repository("other_repo")

    registry.wait_for_processes()
    assert not _can_connect(origin, endpoint_one)
    assert not _can_connect(origin, endpoint_two)
示例#28
0
def test_check_run_health(kubeconfig_file):

    labels = {"foo_label_key": "bar_label_value"}

    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.Mock(spec_set=["read_namespaced_job"])
    mock_k8s_client_batch_api.read_namespaced_job.side_effect = [
        V1Job(status=V1JobStatus(failed=0, succeeded=0)),
        V1Job(status=V1JobStatus(failed=0, succeeded=1)),
        V1Job(status=V1JobStatus(failed=1, succeeded=0)),
    ]
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        labels=labels,
    )

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    loadable_target_origin = LoadableTargetOrigin(python_file=__file__)

    with instance_for_test() as instance:
        with in_process_test_workspace(instance, loadable_target_origin) as workspace:
            location = workspace.get_repository_location(workspace.repository_location_names[0])
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            # Launch the run in a fake Dagster instance.
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                external_pipeline_origin=fake_external_pipeline.get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(),
            )
            k8s_run_launcher.register_instance(instance)

            # same order as side effects
            assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.RUNNING
            assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.SUCCESS
            assert k8s_run_launcher.check_run_worker_health(run).status == WorkerStatus.FAILED
def test_run_always_finishes():  # pylint: disable=redefined-outer-name
    with instance_for_test() as instance:
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="nope",
            python_file=file_relative_path(__file__, "test_default_run_launcher.py"),
        )
        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4
        )
        with server_process.create_ephemeral_client():  # Shuts down when leaves context
            with WorkspaceProcessContext(
                instance,
                GrpcServerTarget(
                    host="localhost",
                    socket=server_process.socket,
                    port=server_process.port,
                    location_name="test",
                ),
            ) as workspace_process_context:
                workspace = workspace_process_context.create_request_context()

                external_pipeline = (
                    workspace.get_repository_location("test")
                    .get_repository("nope")
                    .get_full_external_pipeline("slow_pipeline")
                )

                pipeline_run = instance.create_run_for_pipeline(
                    pipeline_def=slow_pipeline,
                    run_config=None,
                    external_pipeline_origin=external_pipeline.get_external_origin(),
                    pipeline_code_origin=external_pipeline.get_python_origin(),
                )
                run_id = pipeline_run.run_id

                assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED

                instance.launch_run(run_id=run_id, workspace=workspace)

        # Server process now receives shutdown event, run has not finished yet
        pipeline_run = instance.get_run_by_id(run_id)
        assert not pipeline_run.is_finished
        assert server_process.server_process.poll() is None

        # Server should wait until run finishes, then shutdown
        pipeline_run = poll_for_finished_run(instance, run_id)
        assert pipeline_run.status == PipelineRunStatus.SUCCESS

        start_time = time.time()
        while server_process.server_process.poll() is None:
            time.sleep(0.05)
            # Verify server process cleans up eventually
            assert time.time() - start_time < 5

        server_process.wait()
示例#30
0
def test_dagster_out_of_process_location():
    with ManagedGrpcPythonEnvRepositoryLocationOrigin(
            location_name="test_location",
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=file_relative_path(__file__, "setup.py"),
                attribute="test_repo",
            ),
    ).create_test_location() as env:
        assert env.get_repository("test_repo")