def test_origin_ids_stable():
    # This test asserts fixed schedule origin IDs to prevent any changes from
    # accidentally shifting these ids that are persisted to ScheduleStorage

    python_origin = ExternalJobOrigin(
        ExternalRepositoryOrigin(
            ManagedGrpcPythonEnvRepositoryLocationOrigin(
                LoadableTargetOrigin(
                    executable_path="/fake/executable",
                    python_file="/fake/file/path",
                    attribute="fake_attribute",
                )
            ),
            "fake_repo",
        ),
        "fake_schedule",
    )
    assert python_origin.get_id() == "eb01cc697463ba614a67567fdeaafcccc60f0fc4"

    grpc_origin = ExternalJobOrigin(
        ExternalRepositoryOrigin(
            GrpcServerRepositoryLocationOrigin(host="fakehost", port=52618), "repo_name"
        ),
        "fake_schedule",
    )

    assert grpc_origin.get_id() == "0961ecddbddfc71104adf036ebe8cd97a94dc77b"
Exemplo n.º 2
0
def get_external_pipeline_from_grpc_server_repository(pipeline_name):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        attribute="nope",
        python_file=file_relative_path(__file__,
                                       "test_default_run_launcher.py"),
    )
    server_process = GrpcServerProcess(
        loadable_target_origin=loadable_target_origin)

    try:
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.
                create_from_repository_location_origin(
                    GrpcServerRepositoryLocationOrigin(
                        location_name="test",
                        port=api_client.port,
                        socket=api_client.socket,
                        host=api_client.host,
                    )))

            yield repository_location.get_repository(
                "nope").get_full_external_pipeline(pipeline_name)
    finally:
        server_process.wait()
Exemplo n.º 3
0
def _location_origin_from_grpc_server_config(grpc_server_config, yaml_path):
    check.dict_param(grpc_server_config, "grpc_server_config")
    check.str_param(yaml_path, "yaml_path")

    port, socket, host, location_name, use_ssl = (
        grpc_server_config.get("port"),
        grpc_server_config.get("socket"),
        grpc_server_config.get("host"),
        grpc_server_config.get("location_name"),
        grpc_server_config.get("ssl"),
    )

    check.invariant((socket or port) and not (socket and port),
                    "must supply either a socket or a port")

    if not host:
        host = "localhost"

    return GrpcServerRepositoryLocationOrigin(
        port=port,
        socket=socket,
        host=host,
        location_name=location_name,
        use_ssl=use_ssl,
    )
def test_grpc_server_down():
    with _default_instance() as instance:
        down_grpc_repo_origin = ExternalRepositoryOrigin(
            GrpcServerRepositoryLocationOrigin(
                host="localhost",
                port=find_free_port(),
                socket=None,
            ),
            repository_name="down_repo",
        )

        down_grpc_schedule_origin = down_grpc_repo_origin.get_job_origin(
            "down_schedule")

        instance = DagsterInstance.get()
        result = sync_launch_scheduled_execution(down_grpc_schedule_origin,
                                                 "US/Eastern")

        assert isinstance(result, ScheduledExecutionFailed)
        assert "failed to connect to all addresses" in result.errors[
            0].to_string()

        ticks = instance.get_job_ticks(down_grpc_schedule_origin.get_id())
        assert ticks[0].status == JobTickStatus.FAILURE
        assert "failed to connect to all addresses" in ticks[0].error.message
Exemplo n.º 5
0
def location_origins_from_load_target(load_target):
    if isinstance(load_target, WorkspaceFileTarget):
        return location_origins_from_yaml_paths(load_target.paths,)
    elif isinstance(load_target, PythonFileTarget):
        return [
            location_origin_from_python_file(
                python_file=load_target.python_file,
                attribute=load_target.attribute,
                working_directory=load_target.working_directory,
            )
        ]
    elif isinstance(load_target, ModuleTarget):
        return [location_origin_from_module_name(load_target.module_name, load_target.attribute,)]
    elif isinstance(load_target, PackageTarget):
        return [location_origin_from_package_name(load_target.package_name, load_target.attribute,)]
    elif isinstance(load_target, GrpcServerTarget):
        return [
            GrpcServerRepositoryLocationOrigin(
                port=load_target.port, socket=load_target.socket, host=load_target.host,
            )
        ]
    elif isinstance(load_target, EmptyWorkspaceTarget):
        return []
    else:
        check.not_implemented("Unsupported: {}".format(load_target))
def test_server_down():
    with instance_for_test() as instance:
        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="nope",
            python_file=file_relative_path(__file__, "test_default_run_launcher.py"),
        )

        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True
        )

        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.create_from_repository_location_origin(
                    GrpcServerRepositoryLocationOrigin(
                        location_name="test",
                        port=api_client.port,
                        socket=api_client.socket,
                        host=api_client.host,
                    )
                )
            )

            external_pipeline = repository_location.get_repository(
                "nope"
            ).get_full_external_pipeline("sleepy_pipeline")

            pipeline_run = instance.create_run_for_pipeline(
                pipeline_def=sleepy_pipeline, run_config=None
            )

            instance.launch_run(pipeline_run.run_id, external_pipeline)

            poll_for_step_start(instance, pipeline_run.run_id)

            launcher = instance.run_launcher
            assert launcher.can_terminate(pipeline_run.run_id)

            original_run_tags = instance.get_run_by_id(pipeline_run.run_id).tags[GRPC_INFO_TAG]

            # Replace run tags with an invalid port
            instance.add_run_tags(
                pipeline_run.run_id,
                {
                    GRPC_INFO_TAG: seven.json.dumps(
                        merge_dicts({"host": "localhost"}, {"port": find_free_port()})
                    )
                },
            )

            assert not launcher.can_terminate(pipeline_run.run_id)

            instance.add_run_tags(
                pipeline_run.run_id, {GRPC_INFO_TAG: original_run_tags,},
            )

            assert launcher.terminate(pipeline_run.run_id)

        server_process.wait()
Exemplo n.º 7
0
 def create_origins(self):
     return [
         GrpcServerRepositoryLocationOrigin(
             port=self.port,
             socket=self.socket,
             host=self.host,
             location_name=self.location_name,
         )
     ]
Exemplo n.º 8
0
def test_run_always_finishes():  # pylint: disable=redefined-outer-name
    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=slow_pipeline, run_config=None)
        run_id = pipeline_run.run_id

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            attribute="nope",
            python_file=file_relative_path(__file__,
                                           "test_default_run_launcher.py"),
        )
        server_process = GrpcServerProcess(
            loadable_target_origin=loadable_target_origin, max_workers=4)
        with server_process.create_ephemeral_client() as api_client:
            repository_location = GrpcServerRepositoryLocation(
                RepositoryLocationHandle.
                create_from_repository_location_origin(
                    GrpcServerRepositoryLocationOrigin(
                        location_name="test",
                        port=api_client.port,
                        socket=api_client.socket,
                        host=api_client.host,
                    )))

            external_pipeline = repository_location.get_repository(
                "nope").get_full_external_pipeline("slow_pipeline")

            assert instance.get_run_by_id(
                run_id).status == PipelineRunStatus.NOT_STARTED

            instance.launch_run(run_id=pipeline_run.run_id,
                                external_pipeline=external_pipeline)

        # Server process now receives shutdown event, run has not finished yet
        pipeline_run = instance.get_run_by_id(run_id)
        assert not pipeline_run.is_finished
        assert server_process.server_process.poll() is None

        # Server should wait until run finishes, then shutdown
        pipeline_run = poll_for_finished_run(instance, run_id)
        assert pipeline_run.status == PipelineRunStatus.SUCCESS

        start_time = time.time()
        while server_process.server_process.poll() is None:
            time.sleep(0.05)
            # Verify server process cleans up eventually
            assert time.time() - start_time < 5

        server_process.wait()
def grpc_schedule_origin(schedule_name):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=__file__, attribute="the_repo"
    )
    server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin)
    with server_process.create_ephemeral_client() as api_client:
        repo_origin = ExternalRepositoryOrigin(
            GrpcServerRepositoryLocationOrigin(
                host=api_client.host, port=api_client.port, socket=api_client.socket,
            ),
            repository_name="the_repo",
        )

        yield repo_origin.get_job_origin(schedule_name)
    server_process.wait()
Exemplo n.º 10
0
    def get_external_origin(self):
        """
        Hack! Inject origin that the k8s images will use. The k8s helm chart workspace uses a
        gRPC server repo location origin. As a result the normal origin won't work, we need to
        inject this one.
        """

        return ExternalInstigatorOrigin(
            external_repository_origin=ExternalRepositoryOrigin(
                repository_location_origin=GrpcServerRepositoryLocationOrigin(
                    host="user-code-deployment-1",
                    port=3030,
                    location_name="user-code-deployment-1",
                ),
                repository_name="demo_execution_repo",
            ),
            instigator_name=self.name,
        )
        def _mgr_fn(recon_repo):
            check.inst_param(recon_repo, "recon_repo", ReconstructableRepository)

            loadable_target_origin = recon_repo.get_python_origin().loadable_target_origin

            server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin)
            try:
                with server_process.create_ephemeral_client() as api_client:
                    with Workspace(
                        [
                            GrpcServerRepositoryLocationOrigin(
                                port=api_client.port,
                                socket=api_client.socket,
                                host=api_client.host,
                                location_name="test",
                            )
                        ]
                    ) as workspace:
                        yield workspace
            finally:
                server_process.wait()
Exemplo n.º 12
0
def test_cancel_run():
    with instance_for_test() as instance:

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            python_file=__file__,
            working_directory=None,
        )

        server_process = GrpcServerProcess(loadable_target_origin,
                                           max_workers=10)

        with server_process.create_ephemeral_client() as api_client:
            streaming_results = []

            pipeline_run = instance.create_run_for_pipeline(
                streaming_pipeline,
                run_config={
                    "solids": {
                        "streamer": {
                            "config": {
                                "length": 20
                            }
                        }
                    }
                },
            )
            execute_run_args = ExecuteExternalPipelineArgs(
                pipeline_origin=ExternalPipelineOrigin(
                    ExternalRepositoryOrigin(
                        repository_location_origin=
                        GrpcServerRepositoryLocationOrigin(
                            host="localhost",
                            socket=api_client.socket,
                            port=api_client.port,
                        ),
                        repository_name="test_repository",
                    ),
                    pipeline_name="streaming_pipeline",
                ),
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            )
            stream_events_result_thread = threading.Thread(
                target=_stream_events_target,
                args=[streaming_results, api_client, execute_run_args])
            stream_events_result_thread.daemon = True
            stream_events_result_thread.start()
            poll_for_step_start(instance, pipeline_run.run_id)

            res = api_client.cancel_execution(
                cancel_execution_request=CancelExecutionRequest(
                    run_id=pipeline_run.run_id))
            assert res.success is True

            poll_for_finished_run(instance, pipeline_run.run_id)

            logs = instance.all_logs(pipeline_run.run_id)
            assert (len([
                ev for ev in logs
                if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION"
            ]) < 20)

            # soft termination
            assert [
                ev for ev in logs
                if ev.dagster_event.event_type_value == "STEP_FAILURE"
            ]

        server_process.wait()