def _mgr_fn(recon_repo):
            '''Goes out of process but same process as host process'''
            check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository)

            empty_repo = ReconstructableRepository.from_legacy_repository_yaml(
                file_relative_path(__file__, 'empty_repo.yaml')
            )

            yield [
                PythonEnvRepositoryLocation(
                    RepositoryLocationHandle.create_out_of_process_location(
                        location_name='test',
                        repository_code_pointer_dict={
                            recon_repo.get_definition().name: recon_repo.pointer
                        },
                    )
                ),
                InProcessRepositoryLocation(empty_repo),
                PythonEnvRepositoryLocation(
                    RepositoryLocationHandle.create_out_of_process_location(
                        location_name='empty_repo',
                        repository_code_pointer_dict={
                            empty_repo.get_definition().name: empty_repo.pointer
                        },
                    )
                ),
            ]
        def _mgr_fn(recon_repo):
            """Goes out of process but same process as host process"""
            check.inst_param(recon_repo, "recon_repo",
                             ReconstructableRepository)

            empty_repo = ReconstructableRepository.from_legacy_repository_yaml(
                file_relative_path(__file__, "empty_repo.yaml"))

            with Workspace([
                    RepositoryLocationHandle.create_python_env_location(
                        loadable_target_origin=LoadableTargetOrigin(
                            executable_path=sys.executable,
                            python_file=file_relative_path(
                                __file__, "setup.py"),
                            attribute="test_repo",
                        ),
                        location_name="test",
                    ),
                    RepositoryLocationHandle.create_in_process_location(
                        empty_repo.pointer),
                    RepositoryLocationHandle.create_python_env_location(
                        loadable_target_origin=LoadableTargetOrigin(
                            executable_path=sys.executable,
                            python_file=file_relative_path(
                                __file__, "setup.py"),
                            attribute="empty_repo",
                        ),
                        location_name="empty_repo",
                    ),
            ]) as workspace:
                yield workspace
Пример #3
0
def external_pipeline_from_run(pipeline_run):
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    external_pipeline_origin = check.inst(
        pipeline_run.external_pipeline_origin, ExternalPipelineOrigin)

    with RepositoryLocationHandle.create_from_repository_location_origin(
            external_pipeline_origin.external_repository_origin.
            repository_location_origin) as repo_location_handle:
        repo_location = RepositoryLocation.from_handle(repo_location_handle)

        repo_dict = repo_location.get_repositories()
        check.invariant(
            len(repo_dict) == 1,
            "Reconstructed repository location should have exactly one repository",
        )
        external_repo = next(iter(repo_dict.values()))

        pipeline_selector = PipelineSelector(
            location_name=repo_location.name,
            repository_name=external_repo.name,
            pipeline_name=pipeline_run.pipeline_name,
            solid_selection=pipeline_run.solid_selection,
        )

        subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
            pipeline_selector)
        external_pipeline = ExternalPipeline(
            subset_pipeline_result.external_pipeline_data,
            external_repo.handle,
        )
        yield external_pipeline
Пример #4
0
def location_handle_from_python_file(
    python_file,
    attribute,
    working_directory,
    user_process_api,
    location_name=None,
    executable_path=sys.executable,
):
    check.str_param(python_file, "python_file")
    check.opt_str_param(attribute, "attribute")
    check.opt_str_param(working_directory, "working_directory")
    check.inst_param(user_process_api, "user_process_api", UserProcessApi)
    check.opt_str_param(location_name, "location_name")

    loadable_target_origin = LoadableTargetOrigin(
        executable_path=executable_path,
        python_file=python_file,
        module_name=None,
        working_directory=working_directory,
        attribute=attribute,
    )

    return RepositoryLocationHandle.create_python_env_location(
        loadable_target_origin=loadable_target_origin,
        location_name=location_name,
        user_process_api=user_process_api,
    )
Пример #5
0
def load_workspace_from_config(workspace_config, yaml_path, python_user_process_api):
    ensure_workspace_config(workspace_config, yaml_path)
    check.inst_param(python_user_process_api, "python_user_process_api", UserProcessApi)

    if "repository" in workspace_config:
        warnings.warn(
            # link to docs once they exist
            "You are using the legacy repository yaml format. Please update your file "
            "to abide by the new workspace file format."
        )
        return Workspace(
            [
                RepositoryLocationHandle.create_in_process_location(
                    pointer=CodePointer.from_legacy_repository_yaml(yaml_path)
                )
            ]
        )

    location_handles = []
    for location_config in workspace_config["load_from"]:
        location_handles.append(
            _location_handle_from_location_config(
                location_config, yaml_path, python_user_process_api
            )
        )

    return Workspace(location_handles)
Пример #6
0
def location_handle_from_python_file(python_file,
                                     attribute,
                                     location_name=None,
                                     working_directory=None):
    check.str_param(python_file, 'python_file')
    check.opt_str_param(attribute, 'attribute')
    check.opt_str_param(location_name, 'location_name')
    check.opt_str_param(working_directory, 'working_directory')

    loadable_targets = ([
        LoadableTarget(
            attribute,
            load_def_in_python_file(python_file, attribute, working_directory))
    ] if attribute else loadable_targets_from_python_file(
        python_file, working_directory))

    repository_code_pointer_dict = {}
    for loadable_target in loadable_targets:
        repository_code_pointer_dict[loadable_target.target_definition.
                                     name] = CodePointer.from_python_file(
                                         python_file,
                                         loadable_target.attribute,
                                         working_directory)

    return RepositoryLocationHandle.create_out_of_process_location(
        repository_code_pointer_dict=repository_code_pointer_dict,
        # default to the name of the repository symbol for now
        location_name=assign_location_name(location_name,
                                           repository_code_pointer_dict),
    )
Пример #7
0
def legacy_get_bar_repo_handle():
    recon_repo = ReconstructableRepository.from_legacy_repository_yaml(
        file_relative_path(__file__, "legacy_repository_file.yaml"))
    return (RepositoryLocation.from_handle(
        RepositoryLocationHandle.create_from_repository_location_origin(
            InProcessRepositoryLocationOrigin(recon_repo))).get_repository(
                "bar_repo").handle)
Пример #8
0
def launch_scheduled_runs_for_schedule(instance,
                                       schedule_state,
                                       end_datetime_utc,
                                       max_catchup_runs,
                                       debug_crash_flags=None):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(schedule_state, "schedule_state", ScheduleState)
    check.inst_param(end_datetime_utc, "end_datetime_utc", datetime.datetime)

    latest_tick = instance.get_latest_tick(schedule_state.schedule_origin_id)

    if not latest_tick:
        start_timestamp_utc = schedule_state.start_timestamp
    elif latest_tick.status == ScheduleTickStatus.STARTED:
        # Scheduler was interrupted while performing this tick, re-do it
        start_timestamp_utc = latest_tick.timestamp
    else:
        start_timestamp_utc = latest_tick.timestamp + 1

    start_datetime_utc = datetime.datetime.fromtimestamp(start_timestamp_utc,
                                                         tz=get_utc_timezone())

    tick_times = list(
        croniter_range(start_datetime_utc, end_datetime_utc,
                       schedule_state.cron_schedule))

    for schedule_time_utc in tick_times[-max_catchup_runs:]:
        if latest_tick and latest_tick.timestamp == schedule_time_utc.timestamp(
        ):
            tick = latest_tick

        else:
            tick = instance.create_schedule_tick(
                ScheduleTickData(
                    schedule_origin_id=schedule_state.schedule_origin_id,
                    schedule_name=schedule_state.name,
                    timestamp=schedule_time_utc.timestamp(),
                    cron_schedule=schedule_state.cron_schedule,
                    status=ScheduleTickStatus.STARTED,
                ))

            _check_for_debug_crash(debug_crash_flags, "TICK_CREATED")

        with ScheduleTickHolder(tick, instance) as tick_holder:

            _check_for_debug_crash(debug_crash_flags, "TICK_HELD")

            with RepositoryLocationHandle.create_from_repository_origin(
                    schedule_state.origin.repository_origin,
                    instance) as repo_location_handle:
                repo_location = RepositoryLocation.from_handle(
                    repo_location_handle)
                _schedule_run_at_time(
                    instance,
                    repo_location,
                    schedule_state,
                    schedule_time_utc,
                    tick_holder,
                    debug_crash_flags,
                )
 def _mgr_fn(recon_repo):
     check.inst_param(recon_repo, "recon_repo",
                      ReconstructableRepository)
     with Workspace([
             RepositoryLocationHandle.create_in_process_location(
                 recon_repo.pointer)
     ]) as workspace:
         yield workspace
Пример #10
0
def define_test_snapshot_context():
    return DagsterGraphQLContext(
        instance=DagsterInstance.ephemeral(),
        workspace=Workspace([
            RepositoryLocationHandle.create_in_process_location(
                create_main_recon_repo().pointer)
        ]),
    )
Пример #11
0
def repository_location_handle_from_run(pipeline_run):
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)

    external_pipeline_origin = check.inst(
        pipeline_run.external_pipeline_origin, ExternalPipelineOrigin)
    yield RepositoryLocationHandle.create_from_repository_location_origin(
        external_pipeline_origin.external_repository_origin.
        repository_location_origin)
Пример #12
0
def get_example_repository_location_handle():
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=file_relative_path(__file__, "repo.py"),
    )
    location_name = "example_repo_location"

    origin = ManagedGrpcPythonEnvRepositoryLocationOrigin(loadable_target_origin, location_name)

    return RepositoryLocationHandle.create_from_repository_location_origin(origin)
Пример #13
0
def get_test_project_external_repo(container_image=None):
    return RepositoryLocation.from_handle(
        RepositoryLocationHandle.create_from_repository_location_origin(
            InProcessRepositoryLocationOrigin(
                ReconstructableRepository.for_file(
                    file_relative_path(__file__, "test_pipelines/repo.py"),
                    "define_demo_execution_repo",
                    container_image=container_image,
                )))).get_repository("demo_execution_repo")
Пример #14
0
def get_test_external_repo():
    return PythonEnvRepositoryLocation(
        RepositoryLocationHandle.create_out_of_process_location(
            location_name='test_location',
            repository_code_pointer_dict={
                'test_repository': FileCodePointer(__file__,
                                                   'test_repository'),
            },
        )).get_repository('test_repository')
Пример #15
0
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    # Construct Dagster run tags with user defined k8s config.
    expected_resources = {
        "requests": {"cpu": "250m", "memory": "64Mi"},
        "limits": {"cpu": "500m", "memory": "2560Mi"},
    }
    user_defined_k8s_config = UserDefinedDagsterK8sConfig(
        container_config={"resources": expected_resources},
    )
    user_defined_k8s_config_json = json.dumps(user_defined_k8s_config.to_dict())
    tags = {"dagster-k8s/config": user_defined_k8s_config_json}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    location_origin = InProcessRepositoryLocationOrigin(recon_repo)
    location_handle = RepositoryLocationHandle.create_from_repository_location_origin(
        location_origin,
    )
    repo_handle = RepositoryHandle(
        repository_name=repo_def.name,
        repository_location_handle=location_handle,
    )
    fake_external_pipeline = external_pipeline_from_recon_pipeline(
        recon_pipeline,
        solid_selection=None,
        repository_handle=repo_handle,
    )

    # Launch the run in a fake Dagster instance.
    with instance_for_test() as instance:
        pipeline_name = "demo_pipeline"
        run = create_run_for_test(instance, pipeline_name=pipeline_name, tags=tags)
        k8s_run_launcher.initialize(instance)
        k8s_run_launcher.launch_run(None, run, fake_external_pipeline)

    # Check that user defined k8s config was passed down to the k8s job.
    mock_method_calls = mock_k8s_client_batch_api.method_calls
    assert len(mock_method_calls) > 0
    method_name, _args, kwargs = mock_method_calls[0]
    assert method_name == "create_namespaced_job"
    job_resources = kwargs["body"].spec.template.spec.containers[0].resources
    assert job_resources == expected_resources
Пример #16
0
def define_context_for_file(python_file, fn_name, instance):
    check.inst_param(instance, "instance", DagsterInstance)
    return DagsterGraphQLContext(
        workspace=Workspace([
            RepositoryLocationHandle.create_in_process_location(
                CodePointer.from_python_file(python_file, fn_name, None))
        ]),
        instance=instance,
    )
Пример #17
0
def location_handle_from_python_file(python_file,
                                     attribute,
                                     user_process_api,
                                     location_name=None,
                                     working_directory=None):
    check.str_param(python_file, 'python_file')
    check.opt_str_param(attribute, 'attribute')
    check.inst_param(user_process_api, 'user_process_api', UserProcessApi)
    check.opt_str_param(location_name, 'location_name')
    check.opt_str_param(working_directory, 'working_directory')

    if user_process_api == UserProcessApi.GRPC:
        return RepositoryLocationHandle.create_process_bound_grpc_server_location(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=python_file,
                module_name=None,
                working_directory=working_directory,
                attribute=attribute,
            ),
            location_name=location_name,
        )

    loadable_targets = ([
        LoadableTarget(
            attribute,
            load_def_in_python_file(python_file, attribute, working_directory))
    ] if attribute else loadable_targets_from_python_file(
        python_file, working_directory))

    repository_code_pointer_dict = {}
    for loadable_target in loadable_targets:
        repository_code_pointer_dict[loadable_target.target_definition.
                                     name] = CodePointer.from_python_file(
                                         python_file,
                                         loadable_target.attribute,
                                         working_directory)

    return RepositoryLocationHandle.create_out_of_process_location(
        repository_code_pointer_dict=repository_code_pointer_dict,
        # default to the name of the repository symbol for now
        location_name=assign_location_name(location_name,
                                           repository_code_pointer_dict),
    )
Пример #18
0
def define_context_for_repository_yaml(path, instance):
    check.inst_param(instance, "instance", DagsterInstance)
    return DagsterGraphQLContext(
        workspace=Workspace([
            RepositoryLocationHandle.create_in_process_location(
                ReconstructableRepository.from_legacy_repository_yaml(
                    path).pointer)
        ]),
        instance=instance,
    )
Пример #19
0
def location_handle_from_python_file(
    python_file,
    attribute,
    user_process_api,
    location_name=None,
    working_directory=None,
    executable_path=sys.executable,
):
    check.str_param(python_file, 'python_file')
    check.opt_str_param(attribute, 'attribute')
    check.inst_param(user_process_api, 'user_process_api', UserProcessApi)
    check.opt_str_param(location_name, 'location_name')
    check.opt_str_param(working_directory, 'working_directory')

    if user_process_api == UserProcessApi.GRPC:
        return RepositoryLocationHandle.create_process_bound_grpc_server_location(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=executable_path,
                python_file=python_file,
                module_name=None,
                working_directory=working_directory,
                attribute=attribute,
            ),
            location_name=location_name,
        )
    else:
        response = sync_list_repositories(
            executable_path=executable_path,
            python_file=python_file,
            module_name=None,
            working_directory=working_directory,
            attribute=attribute,
        )
        return RepositoryLocationHandle.create_python_env_location(
            executable_path=executable_path,
            location_name=location_name,
            repository_code_pointer_dict={
                lrs.repository_name:
                CodePointer.from_python_file(python_file, lrs.attribute,
                                             working_directory)
                for lrs in response.repository_symbols
            },
        )
Пример #20
0
def get_test_external_repo():
    return PythonEnvRepositoryLocation(
        RepositoryLocationHandle.create_python_env_location(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=__file__,
                attribute="test_repository",
            ),
            location_name="test_location",
        )).get_repository("test_repository")
Пример #21
0
def cli_api_repo():
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=__file__, attribute="the_repo",
    )

    yield PythonEnvRepositoryLocation(
        RepositoryLocationHandle.create_python_env_location(
            loadable_target_origin=loadable_target_origin, location_name="test_location",
        )
    ).get_repository("the_repo")
Пример #22
0
def get_test_external_repo():
    with RepositoryLocationHandle.create_from_repository_location_origin(
        ManagedGrpcPythonEnvRepositoryLocationOrigin(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable, python_file=__file__, attribute="test_repository",
            ),
            location_name="test_location",
        )
    ) as handle:
        yield RepositoryLocation.from_handle(handle).get_repository("test_repository")
Пример #23
0
def get_main_external_repo():
    with RepositoryLocationHandle.create_from_repository_location_origin(
            location_origin_from_python_file(
                python_file=file_relative_path(__file__, "setup.py"),
                attribute=main_repo_name(),
                working_directory=None,
                location_name=main_repo_location_name(),
            )) as handle:
        yield RepositoryLocation.from_handle(handle).get_repository(
            main_repo_name())
Пример #24
0
def default_repo():
    loadable_target_origin = LoadableTargetOrigin(
        executable_path=sys.executable, python_file=__file__, working_directory=os.getcwd(),
    )

    with RepositoryLocationHandle.create_from_repository_location_origin(
        ManagedGrpcPythonEnvRepositoryLocationOrigin(
            loadable_target_origin=loadable_target_origin, location_name="test_location",
        )
    ) as handle:
        yield RepositoryLocation.from_handle(handle).get_repository("the_repo")
Пример #25
0
def test_dagster_out_of_process_location():
    env = PythonEnvRepositoryLocation(
        RepositoryLocationHandle.create_out_of_process_location(
            location_name='test_location',
            repository_code_pointer_dict={
                'test_repo':
                FileCodePointer(file_relative_path(__file__, 'setup.py'),
                                'test_repo'),
            },
        ))
    assert env.get_repository('test_repo')
Пример #26
0
def test_dagster_out_of_process_location():
    env = PythonEnvRepositoryLocation(
        RepositoryLocationHandle.create_python_env_location(
            location_name="test_location",
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=file_relative_path(__file__, "setup.py"),
                attribute="test_repo",
            ),
        ))
    assert env.get_repository("test_repo")
Пример #27
0
def get_test_external_repo():
    return RepositoryLocation.from_handle(
        RepositoryLocationHandle.create_python_env_location(
            loadable_target_origin=LoadableTargetOrigin(
                executable_path=sys.executable,
                python_file=__file__,
                attribute="test_repository",
            ),
            location_name="test_location",
            user_process_api=UserProcessApi.CLI,
        )).get_repository("test_repository")
Пример #28
0
    def __init__(self, recon_repo):
        self._recon_repo = check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository)
        self._handle = RepositoryLocationHandle.create_in_process_location(recon_repo.pointer)

        repo_def = recon_repo.get_definition()
        def_name = repo_def.name
        self._external_repo = external_repo_from_def(
            repo_def,
            RepositoryHandle(repository_name=def_name, repository_location_handle=self._handle),
        )
        self._repositories = {self._external_repo.name: self._external_repo}
Пример #29
0
def launch_scheduled_runs(
    instance,
    logger,
    end_datetime_utc,
    max_catchup_runs=_DEFAULT_MAX_CATCHUP_RUNS,
    debug_crash_flags=None,
):
    schedules = [
        s for s in instance.all_stored_schedule_state() if s.status == ScheduleStatus.RUNNING
    ]

    if not isinstance(instance.scheduler, DagsterCommandLineScheduler):
        raise DagsterInvariantViolationError(
            """Your dagster.yaml must be configured as follows in order to use dagster-scheduler:
scheduler:
  module: dagster.core.scheduler
  class: DagsterCommandLineScheduler
        """,
        )

    if not schedules:
        logger.info("Not checking for any runs since no schedules have been started.")
        return

    logger.info(
        "Checking for new runs for the following schedules: {schedule_names}".format(
            schedule_names=", ".join([schedule.name for schedule in schedules]),
        )
    )

    for schedule_state in schedules:
        try:
            with RepositoryLocationHandle.create_from_repository_origin(
                schedule_state.origin.repository_origin, instance
            ) as repo_location_handle:
                repo_location = RepositoryLocation.from_handle(repo_location_handle)

                launch_scheduled_runs_for_schedule(
                    instance,
                    logger,
                    schedule_state,
                    repo_location,
                    end_datetime_utc,
                    max_catchup_runs,
                    (debug_crash_flags.get(schedule_state.name) if debug_crash_flags else None),
                )
        except Exception:  # pylint: disable=broad-except
            logger.error(
                "Scheduler failed for {schedule_name} : {error_info}".format(
                    schedule_name=schedule_state.name,
                    error_info=serializable_error_info_from_exc_info(sys.exc_info()).to_string(),
                )
            )
Пример #30
0
def define_out_of_process_context(python_file, fn_name, instance):
    check.inst_param(instance, 'instance', DagsterInstance)

    return DagsterGraphQLContext(
        locations=[
            PythonEnvRepositoryLocation(
                RepositoryLocationHandle.create_out_of_process_location(
                    'test_location',
                    {fn_name: FileCodePointer(python_file, fn_name)}))
        ],
        instance=instance,
    )