def test_bad_load():
    with _default_instance() as instance:
        instance = DagsterInstance.get()

        working_directory = os.path.dirname(__file__)

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            python_file=__file__,
            attribute="doesnt_exist",
            working_directory=working_directory,
        )

        repo_origin = ExternalRepositoryOrigin(
            ManagedGrpcPythonEnvRepositoryLocationOrigin(
                loadable_target_origin=loadable_target_origin
            ),
            "doesnt_exist",
        )

        schedule_origin = repo_origin.get_job_origin("also_doesnt_exist")

        result = sync_launch_scheduled_execution(schedule_origin)
        assert isinstance(result, ScheduledExecutionFailed)
        assert "doesnt_exist not found at module scope in file" in result.errors[0].to_string()

        ticks = instance.get_job_ticks(schedule_origin.get_id())
        assert ticks[0].status == JobTickStatus.FAILURE
        assert "doesnt_exist not found at module scope in file" in ticks[0].error.message
def test_origin_ids_stable():
    # This test asserts fixed schedule origin IDs to prevent any changes from
    # accidentally shifting these ids that are persisted to ScheduleStorage

    python_origin = ExternalJobOrigin(
        ExternalRepositoryOrigin(
            ManagedGrpcPythonEnvRepositoryLocationOrigin(
                LoadableTargetOrigin(
                    executable_path="/fake/executable",
                    python_file="/fake/file/path",
                    attribute="fake_attribute",
                )
            ),
            "fake_repo",
        ),
        "fake_schedule",
    )
    assert python_origin.get_id() == "eb01cc697463ba614a67567fdeaafcccc60f0fc4"

    grpc_origin = ExternalJobOrigin(
        ExternalRepositoryOrigin(
            GrpcServerRepositoryLocationOrigin(host="fakehost", port=52618), "repo_name"
        ),
        "fake_schedule",
    )

    assert grpc_origin.get_id() == "0961ecddbddfc71104adf036ebe8cd97a94dc77b"
def test_grpc_server_down():
    with _default_instance() as instance:
        down_grpc_repo_origin = ExternalRepositoryOrigin(
            GrpcServerRepositoryLocationOrigin(
                host="localhost",
                port=find_free_port(),
                socket=None,
            ),
            repository_name="down_repo",
        )

        down_grpc_schedule_origin = down_grpc_repo_origin.get_job_origin(
            "down_schedule")

        instance = DagsterInstance.get()
        result = sync_launch_scheduled_execution(down_grpc_schedule_origin,
                                                 "US/Eastern")

        assert isinstance(result, ScheduledExecutionFailed)
        assert "failed to connect to all addresses" in result.errors[
            0].to_string()

        ticks = instance.get_job_ticks(down_grpc_schedule_origin.get_id())
        assert ticks[0].status == JobTickStatus.FAILURE
        assert "failed to connect to all addresses" in ticks[0].error.message
def python_schedule_origin(schedule_name):

    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=__file__, attribute="the_repo"
    )

    repo_origin = ExternalRepositoryOrigin(
        ManagedGrpcPythonEnvRepositoryLocationOrigin(loadable_target_origin=loadable_target_origin),
        "the_repo",
    )

    yield repo_origin.get_job_origin(schedule_name)
def grpc_schedule_origin(schedule_name):
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=__file__, attribute="the_repo"
    )
    server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin)
    with server_process.create_ephemeral_client() as api_client:
        repo_origin = ExternalRepositoryOrigin(
            GrpcServerRepositoryLocationOrigin(
                host=api_client.host, port=api_client.port, socket=api_client.socket,
            ),
            repository_name="the_repo",
        )

        yield repo_origin.get_job_origin(schedule_name)
    server_process.wait()
示例#6
0
def _get_unloadable_sensor_origin():
    working_directory = os.path.dirname(__file__)
    recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist",
                                                    working_directory)
    return ExternalRepositoryOrigin(
        InProcessRepositoryLocationOrigin(recon_repo),
        "fake_repository").get_job_origin("doesnt_exist")
示例#7
0
def sync_get_streaming_external_repositories_data_grpc(
    api_client: "DagsterGrpcClient", repository_location: "RepositoryLocation"
) -> Mapping[str, ExternalRepositoryData]:
    from dagster.core.host_representation import ExternalRepositoryOrigin, RepositoryLocation

    check.inst_param(repository_location, "repository_location",
                     RepositoryLocation)

    repo_datas = {}
    for repository_name in repository_location.repository_names:  # type: ignore
        external_repository_chunks = list(
            api_client.streaming_external_repository(
                external_repository_origin=ExternalRepositoryOrigin(
                    repository_location.origin,
                    repository_name,
                )))

        external_repository_data = deserialize_as(
            "".join([
                chunk["serialized_external_repository_chunk"]
                for chunk in external_repository_chunks
            ]),
            ExternalRepositoryData,
        )

        repo_datas[repository_name] = external_repository_data
    return repo_datas
示例#8
0
def sync_get_streaming_external_repositories_grpc(api_client, repository_location_handle):
    check.inst_param(
        repository_location_handle, "repository_location_handle", RepositoryLocationHandle
    )

    repos = []
    for repository_name in repository_location_handle.repository_names:
        external_repository_chunks = list(
            api_client.streaming_external_repository(
                external_repository_origin=ExternalRepositoryOrigin(
                    repository_location_handle.origin, repository_name,
                )
            )
        )

        external_repository_data = deserialize_json_to_dagster_namedtuple(
            "".join(
                [
                    chunk["serialized_external_repository_chunk"]
                    for chunk in external_repository_chunks
                ]
            )
        )

        repos.append(
            ExternalRepository(
                external_repository_data,
                RepositoryHandle(
                    repository_name=external_repository_data.name,
                    repository_location_handle=repository_location_handle,
                ),
            )
        )
    return repos
示例#9
0
def sync_get_external_repositories_grpc(api_client, repository_location_handle):
    check.inst_param(
        repository_location_handle, "repository_location_handle", RepositoryLocationHandle
    )

    repos = []
    for repository_name in repository_location_handle.repository_names:
        external_repository_data = check.inst(
            api_client.external_repository(
                external_repository_origin=ExternalRepositoryOrigin(
                    repository_location_handle.origin, repository_name,
                )
            ),
            ExternalRepositoryData,
        )
        repos.append(
            ExternalRepository(
                external_repository_data,
                RepositoryHandle(
                    repository_name=external_repository_data.name,
                    repository_location_handle=repository_location_handle,
                ),
            )
        )
    return repos
示例#10
0
def sync_get_streaming_external_repositories_data_grpc(api_client, repository_location):
    from dagster.core.host_representation import (
        RepositoryLocation,
        ExternalRepositoryOrigin,
    )

    check.inst_param(repository_location, "repository_location", RepositoryLocation)

    repo_datas = {}
    for repository_name in repository_location.repository_names:
        external_repository_chunks = list(
            api_client.streaming_external_repository(
                external_repository_origin=ExternalRepositoryOrigin(
                    repository_location.origin,
                    repository_name,
                )
            )
        )

        external_repository_data = deserialize_json_to_dagster_namedtuple(
            "".join(
                [
                    chunk["serialized_external_repository_chunk"]
                    for chunk in external_repository_chunks
                ]
            )
        )

        repo_datas[repository_name] = external_repository_data
    return repo_datas
示例#11
0
 def fake_repo_target():
     return ExternalRepositoryOrigin(
         ManagedGrpcPythonEnvRepositoryLocationOrigin(
             LoadableTargetOrigin(executable_path=sys.executable,
                                  module_name="fake",
                                  attribute="fake"), ),
         "fake_repo_name",
     )
示例#12
0
def get_external_repository_origin_from_kwargs(kwargs):
    provided_repo_name = kwargs.get("repository")

    if not provided_repo_name:
        raise click.UsageError("Must provide --repository to load a repository")

    repository_location_origin = get_repository_location_origin_from_kwargs(kwargs)

    return ExternalRepositoryOrigin(repository_location_origin, provided_repo_name)
示例#13
0
def _get_unloadable_sensor_origin(name):
    working_directory = os.path.dirname(__file__)
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable,
        python_file=__file__,
        working_directory=working_directory,
    )
    return ExternalRepositoryOrigin(
        InProcessRepositoryLocationOrigin(loadable_target_origin),
        "fake_repository").get_instigator_origin(name)
示例#14
0
def _unloadable_partition_set_origin():
    working_directory = os.path.dirname(__file__)
    return ExternalRepositoryOrigin(
        InProcessRepositoryLocationOrigin(
            LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=__file__,
                working_directory=working_directory,
            )),
        "fake_repository",
    ).get_partition_set_origin("doesnt_exist")
示例#15
0
def test_bad_load_sensor_repository(external_repo_context, capfd):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019,
                             month=2,
                             day=27,
                             hour=23,
                             minute=59,
                             second=59,
                             tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):
            external_sensor = external_repo.get_external_sensor(
                "simple_sensor")

            valid_origin = external_sensor.get_external_origin()

            # Swap out a new repository name
            invalid_repo_origin = ExternalJobOrigin(
                ExternalRepositoryOrigin(
                    valid_origin.external_repository_origin.
                    repository_location_origin,
                    "invalid_repo_name",
                ),
                valid_origin.job_name,
            )

            instance.add_job_state(
                JobState(invalid_repo_origin, JobType.SENSOR,
                         JobStatus.RUNNING))

            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(invalid_repo_origin.get_id())
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)

            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(invalid_repo_origin.get_id())
            assert len(ticks) == 0

            captured = capfd.readouterr()
            assert "Sensor daemon caught an error for sensor simple_sensor" in captured.out
            assert (
                "Could not find repository invalid_repo_name in location test_location to run sensor simple_sensor"
                in captured.out)
示例#16
0
def test_cancel_run():
    with instance_for_test() as instance:

        loadable_target_origin = LoadableTargetOrigin(
            executable_path=sys.executable,
            python_file=__file__,
            working_directory=None,
        )

        server_process = GrpcServerProcess(loadable_target_origin,
                                           max_workers=10)

        with server_process.create_ephemeral_client() as api_client:
            streaming_results = []

            pipeline_run = instance.create_run_for_pipeline(
                streaming_pipeline,
                run_config={
                    "solids": {
                        "streamer": {
                            "config": {
                                "length": 20
                            }
                        }
                    }
                },
            )
            execute_run_args = ExecuteExternalPipelineArgs(
                pipeline_origin=ExternalPipelineOrigin(
                    ExternalRepositoryOrigin(
                        repository_location_origin=
                        GrpcServerRepositoryLocationOrigin(
                            host="localhost",
                            socket=api_client.socket,
                            port=api_client.port,
                        ),
                        repository_name="test_repository",
                    ),
                    pipeline_name="streaming_pipeline",
                ),
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            )
            stream_events_result_thread = threading.Thread(
                target=_stream_events_target,
                args=[streaming_results, api_client, execute_run_args])
            stream_events_result_thread.daemon = True
            stream_events_result_thread.start()
            poll_for_step_start(instance, pipeline_run.run_id)

            res = api_client.cancel_execution(
                cancel_execution_request=CancelExecutionRequest(
                    run_id=pipeline_run.run_id))
            assert res.success is True

            poll_for_finished_run(instance, pipeline_run.run_id)

            logs = instance.all_logs(pipeline_run.run_id)
            assert (len([
                ev for ev in logs
                if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION"
            ]) < 20)

            # soft termination
            assert [
                ev for ev in logs
                if ev.dagster_event.event_type_value == "STEP_FAILURE"
            ]

        server_process.wait()