Exemplo n.º 1
0
def test_queued_pipeline_origin_check():

    code_pointer = ModuleCodePointer("fake", "fake")
    fake_pipeline_origin = ExternalPipelineOrigin(
        ExternalRepositoryOrigin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository(code_pointer)),
            "foo_repo",
        ),
        "foo",
    )

    fake_code_origin = PipelinePythonOrigin(
        pipeline_name="foo",
        repository_origin=RepositoryPythonOrigin(
            sys.executable,
            code_pointer,
        ),
    )

    PipelineRun(
        status=PipelineRunStatus.QUEUED,
        external_pipeline_origin=fake_pipeline_origin,
        pipeline_code_origin=fake_code_origin,
    )

    with pytest.raises(check.CheckError):
        PipelineRun(status=PipelineRunStatus.QUEUED)

    with pytest.raises(check.CheckError):
        PipelineRun().with_status(PipelineRunStatus.QUEUED)
Exemplo n.º 2
0
    def get_external_origin(self):
        """
        Hack! Inject origin that the k8s images will use. The BK image uses a different directory
        structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the images
        inside the kind cluster (/dagster_test/test_project). As a result the normal origin won't
        work, we need to inject this one.
        """

        return ExternalJobOrigin(
            external_repository_origin=ExternalRepositoryOrigin(
                repository_location_origin=InProcessRepositoryLocationOrigin(
                    recon_repo=ReconstructableRepository(
                        pointer=FileCodePointer(
                            python_file=
                            "/dagster_test/test_project/test_pipelines/repo.py",
                            fn_name="define_demo_execution_repo",
                        ),
                        container_image=self._container_image,
                        executable_path="python",
                        entry_point=DEFAULT_DAGSTER_ENTRY_POINT,
                    )),
                repository_name="demo_execution_repo",
            ),
            job_name=self.name,
        )
Exemplo n.º 3
0
def test_sensor_timeout():
    port = find_free_port()
    python_file = file_relative_path(__file__, "grpc_repo.py")

    subprocess_args = [
        "dagster",
        "api",
        "grpc",
        "--port",
        str(port),
        "--python-file",
        python_file,
    ]

    process = subprocess.Popen(
        subprocess_args,
        stdout=subprocess.PIPE,
    )

    try:
        wait_for_grpc_server(
            process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args
        )
        client = DagsterGrpcClient(port=port)

        with instance_for_test() as instance:
            repo_origin = ExternalRepositoryOrigin(
                repository_location_origin=GrpcServerRepositoryLocationOrigin(
                    port=port, host="localhost"
                ),
                repository_name="bar_repo",
            )
            with pytest.raises(DagsterUserCodeUnreachableError) as exc_info:
                client.external_sensor_execution(
                    sensor_execution_args=SensorExecutionArgs(
                        repository_origin=repo_origin,
                        instance_ref=instance.get_ref(),
                        sensor_name="slow_sensor",
                        last_completion_time=None,
                        last_run_key=None,
                        cursor=None,
                    ),
                    timeout=2,
                )

            assert "Deadline Exceeded" in str(exc_info.getrepr())

            # Call succeeds without the timeout
            client.external_sensor_execution(
                sensor_execution_args=SensorExecutionArgs(
                    repository_origin=repo_origin,
                    instance_ref=instance.get_ref(),
                    sensor_name="slow_sensor",
                    last_completion_time=None,
                    last_run_key=None,
                    cursor=None,
                ),
            )
    finally:
        process.terminate()
Exemplo n.º 4
0
def test_sensor_timeout():
    port = find_free_port()
    python_file = file_relative_path(__file__, "grpc_repo.py")

    ipc_output_file = _get_ipc_output_file()
    process = subprocess.Popen(
        [
            "dagster",
            "api",
            "grpc",
            "--port",
            str(port),
            "--python-file",
            python_file,
            "--ipc-output-file",
            ipc_output_file,
        ],
        stdout=subprocess.PIPE,
    )

    try:
        wait_for_grpc_server(process, ipc_output_file)
        client = DagsterGrpcClient(port=port)

        with instance_for_test() as instance:
            repo_origin = ExternalRepositoryOrigin(
                repository_location_origin=GrpcServerRepositoryLocationOrigin(
                    port=port, host="localhost"
                ),
                repository_name="bar_repo",
            )
            with pytest.raises(Exception, match="Deadline Exceeded"):
                client.external_sensor_execution(
                    sensor_execution_args=SensorExecutionArgs(
                        repository_origin=repo_origin,
                        instance_ref=instance.get_ref(),
                        sensor_name="slow_sensor",
                        last_completion_time=None,
                        last_run_key=None,
                    ),
                    timeout=2,
                )

            # Call succeeds without the timeout
            client.external_sensor_execution(
                sensor_execution_args=SensorExecutionArgs(
                    repository_origin=repo_origin,
                    instance_ref=instance.get_ref(),
                    sensor_name="slow_sensor",
                    last_completion_time=None,
                    last_run_key=None,
                ),
            )
    finally:
        process.terminate()
Exemplo n.º 5
0
def get_repository_origin_from_kwargs(kwargs):
    provided_repo_name = kwargs.get("repository")

    if not provided_repo_name:
        raise click.UsageError(
            "Must provide --repository to load a repository")

    repository_location_origin = get_repository_location_origin_from_kwargs(
        kwargs)

    return ExternalRepositoryOrigin(repository_location_origin,
                                    provided_repo_name)
Exemplo n.º 6
0
def create_invalid_run(instance, **kwargs):
    create_run_for_test(
        instance,
        external_pipeline_origin=ExternalPipelineOrigin(
            ExternalRepositoryOrigin(
                InProcessRepositoryLocationOrigin(
                    ReconstructableRepository(ModuleCodePointer(
                        "fake", "fake"))),
                "foo",
            ),
            "wrong-pipeline",
        ),
        pipeline_name="wrong-pipeline",
        **kwargs,
    )
Exemplo n.º 7
0
def test_queued_pipeline_origin_check():
    fake_pipeline_origin = ExternalPipelineOrigin(
        ExternalRepositoryOrigin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository(ModuleCodePointer("fake", "fake"))),
            "foo_repo",
        ),
        "foo",
    )

    PipelineRun(status=PipelineRunStatus.QUEUED,
                external_pipeline_origin=fake_pipeline_origin)

    with pytest.raises(check.CheckError):
        PipelineRun(status=PipelineRunStatus.QUEUED)

    with pytest.raises(check.CheckError):
        PipelineRun().with_status(PipelineRunStatus.QUEUED)
Exemplo n.º 8
0
    def get_external_origin(self):
        """
        Hack! Inject origin that the k8s images will use. The k8s helm chart workspace uses a
        gRPC server repo location origin. As a result the normal origin won't work, we need to
        inject this one.
        """

        return ExternalInstigatorOrigin(
            external_repository_origin=ExternalRepositoryOrigin(
                repository_location_origin=GrpcServerRepositoryLocationOrigin(
                    host="user-code-deployment-1",
                    port=3030,
                    location_name="user-code-deployment-1",
                ),
                repository_name="demo_execution_repo",
            ),
            instigator_name=self.name,
        )
Exemplo n.º 9
0
 def get_external_origin(self):
     return ExternalRepositoryOrigin(
         self.repository_location_handle.origin,
         self.repository_name,
     )
Exemplo n.º 10
0
def test_external_job_origin_instigator_origin():
    def build_legacy_whitelist_map():
        legacy_env = WhitelistMap.create()

        @_whitelist_for_serdes(legacy_env)
        class ExternalJobOrigin(
                namedtuple("_ExternalJobOrigin",
                           "external_repository_origin job_name")):
            def get_id(self):
                return create_snapshot_id(self)

        @_whitelist_for_serdes(legacy_env)
        class ExternalRepositoryOrigin(
                namedtuple("_ExternalRepositoryOrigin",
                           "repository_location_origin repository_name")):
            def get_id(self):
                return create_snapshot_id(self)

        class GrpcServerOriginSerializer(DefaultNamedTupleSerializer):
            @classmethod
            def skip_when_empty(cls):
                return {"use_ssl"}

        @_whitelist_for_serdes(whitelist_map=legacy_env,
                               serializer=GrpcServerOriginSerializer)
        class GrpcServerRepositoryLocationOrigin(
                namedtuple(
                    "_GrpcServerRepositoryLocationOrigin",
                    "host port socket location_name use_ssl",
                ), ):
            def __new__(cls,
                        host,
                        port=None,
                        socket=None,
                        location_name=None,
                        use_ssl=None):
                return super(GrpcServerRepositoryLocationOrigin,
                             cls).__new__(cls, host, port, socket,
                                          location_name, use_ssl)

        return (
            legacy_env,
            ExternalJobOrigin,
            ExternalRepositoryOrigin,
            GrpcServerRepositoryLocationOrigin,
        )

    legacy_env, klass, repo_klass, location_klass = build_legacy_whitelist_map(
    )

    from dagster.core.host_representation.origin import (
        ExternalInstigatorOrigin,
        ExternalRepositoryOrigin,
        GrpcServerRepositoryLocationOrigin,
    )

    # serialize from current code, compare against old code
    instigator_origin = ExternalInstigatorOrigin(
        external_repository_origin=ExternalRepositoryOrigin(
            repository_location_origin=GrpcServerRepositoryLocationOrigin(
                host="localhost", port=1234, location_name="test_location"),
            repository_name="the_repo",
        ),
        instigator_name="simple_schedule",
    )
    instigator_origin_str = serialize_dagster_namedtuple(instigator_origin)
    instigator_to_job = _deserialize_json(instigator_origin_str, legacy_env)
    assert isinstance(instigator_to_job, klass)
    # ensure that the origin id is stable
    assert instigator_to_job.get_id() == instigator_origin.get_id()

    # # serialize from old code, compare against current code
    job_origin = klass(
        external_repository_origin=repo_klass(
            repository_location_origin=location_klass(
                host="localhost", port=1234, location_name="test_location"),
            repository_name="the_repo",
        ),
        job_name="simple_schedule",
    )
    job_origin_str = serialize_value(job_origin, legacy_env)
    from dagster.serdes.serdes import _WHITELIST_MAP

    job_to_instigator = deserialize_json_to_dagster_namedtuple(job_origin_str)
    assert isinstance(job_to_instigator, ExternalInstigatorOrigin)
    # ensure that the origin id is stable
    assert job_to_instigator.get_id() == job_origin.get_id()