def _mgr_fn(recon_repo): '''Goes out of process but same process as host process''' check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository) empty_repo = ReconstructableRepository.from_legacy_repository_yaml( file_relative_path(__file__, 'empty_repo.yaml') ) yield [ PythonEnvRepositoryLocation( RepositoryLocationHandle.create_out_of_process_location( location_name='test', repository_code_pointer_dict={ recon_repo.get_definition().name: recon_repo.pointer }, ) ), InProcessRepositoryLocation(empty_repo), PythonEnvRepositoryLocation( RepositoryLocationHandle.create_out_of_process_location( location_name='empty_repo', repository_code_pointer_dict={ empty_repo.get_definition().name: empty_repo.pointer }, ) ), ]
def _mgr_fn(recon_repo): """Goes out of process but same process as host process""" check.inst_param(recon_repo, "recon_repo", ReconstructableRepository) empty_repo = ReconstructableRepository.from_legacy_repository_yaml( file_relative_path(__file__, "empty_repo.yaml")) with Workspace([ RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=file_relative_path( __file__, "setup.py"), attribute="test_repo", ), location_name="test", ), RepositoryLocationHandle.create_in_process_location( empty_repo.pointer), RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=file_relative_path( __file__, "setup.py"), attribute="empty_repo", ), location_name="empty_repo", ), ]) as workspace: yield workspace
def external_pipeline_from_run(pipeline_run): check.inst_param(pipeline_run, "pipeline_run", PipelineRun) external_pipeline_origin = check.inst( pipeline_run.external_pipeline_origin, ExternalPipelineOrigin) with RepositoryLocationHandle.create_from_repository_location_origin( external_pipeline_origin.external_repository_origin. repository_location_origin) as repo_location_handle: repo_location = RepositoryLocation.from_handle(repo_location_handle) repo_dict = repo_location.get_repositories() check.invariant( len(repo_dict) == 1, "Reconstructed repository location should have exactly one repository", ) external_repo = next(iter(repo_dict.values())) pipeline_selector = PipelineSelector( location_name=repo_location.name, repository_name=external_repo.name, pipeline_name=pipeline_run.pipeline_name, solid_selection=pipeline_run.solid_selection, ) subset_pipeline_result = repo_location.get_subset_external_pipeline_result( pipeline_selector) external_pipeline = ExternalPipeline( subset_pipeline_result.external_pipeline_data, external_repo.handle, ) yield external_pipeline
def location_handle_from_python_file( python_file, attribute, working_directory, user_process_api, location_name=None, executable_path=sys.executable, ): check.str_param(python_file, "python_file") check.opt_str_param(attribute, "attribute") check.opt_str_param(working_directory, "working_directory") check.inst_param(user_process_api, "user_process_api", UserProcessApi) check.opt_str_param(location_name, "location_name") loadable_target_origin = LoadableTargetOrigin( executable_path=executable_path, python_file=python_file, module_name=None, working_directory=working_directory, attribute=attribute, ) return RepositoryLocationHandle.create_python_env_location( loadable_target_origin=loadable_target_origin, location_name=location_name, user_process_api=user_process_api, )
def load_workspace_from_config(workspace_config, yaml_path, python_user_process_api): ensure_workspace_config(workspace_config, yaml_path) check.inst_param(python_user_process_api, "python_user_process_api", UserProcessApi) if "repository" in workspace_config: warnings.warn( # link to docs once they exist "You are using the legacy repository yaml format. Please update your file " "to abide by the new workspace file format." ) return Workspace( [ RepositoryLocationHandle.create_in_process_location( pointer=CodePointer.from_legacy_repository_yaml(yaml_path) ) ] ) location_handles = [] for location_config in workspace_config["load_from"]: location_handles.append( _location_handle_from_location_config( location_config, yaml_path, python_user_process_api ) ) return Workspace(location_handles)
def location_handle_from_python_file(python_file, attribute, location_name=None, working_directory=None): check.str_param(python_file, 'python_file') check.opt_str_param(attribute, 'attribute') check.opt_str_param(location_name, 'location_name') check.opt_str_param(working_directory, 'working_directory') loadable_targets = ([ LoadableTarget( attribute, load_def_in_python_file(python_file, attribute, working_directory)) ] if attribute else loadable_targets_from_python_file( python_file, working_directory)) repository_code_pointer_dict = {} for loadable_target in loadable_targets: repository_code_pointer_dict[loadable_target.target_definition. name] = CodePointer.from_python_file( python_file, loadable_target.attribute, working_directory) return RepositoryLocationHandle.create_out_of_process_location( repository_code_pointer_dict=repository_code_pointer_dict, # default to the name of the repository symbol for now location_name=assign_location_name(location_name, repository_code_pointer_dict), )
def legacy_get_bar_repo_handle(): recon_repo = ReconstructableRepository.from_legacy_repository_yaml( file_relative_path(__file__, "legacy_repository_file.yaml")) return (RepositoryLocation.from_handle( RepositoryLocationHandle.create_from_repository_location_origin( InProcessRepositoryLocationOrigin(recon_repo))).get_repository( "bar_repo").handle)
def launch_scheduled_runs_for_schedule(instance, schedule_state, end_datetime_utc, max_catchup_runs, debug_crash_flags=None): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(schedule_state, "schedule_state", ScheduleState) check.inst_param(end_datetime_utc, "end_datetime_utc", datetime.datetime) latest_tick = instance.get_latest_tick(schedule_state.schedule_origin_id) if not latest_tick: start_timestamp_utc = schedule_state.start_timestamp elif latest_tick.status == ScheduleTickStatus.STARTED: # Scheduler was interrupted while performing this tick, re-do it start_timestamp_utc = latest_tick.timestamp else: start_timestamp_utc = latest_tick.timestamp + 1 start_datetime_utc = datetime.datetime.fromtimestamp(start_timestamp_utc, tz=get_utc_timezone()) tick_times = list( croniter_range(start_datetime_utc, end_datetime_utc, schedule_state.cron_schedule)) for schedule_time_utc in tick_times[-max_catchup_runs:]: if latest_tick and latest_tick.timestamp == schedule_time_utc.timestamp( ): tick = latest_tick else: tick = instance.create_schedule_tick( ScheduleTickData( schedule_origin_id=schedule_state.schedule_origin_id, schedule_name=schedule_state.name, timestamp=schedule_time_utc.timestamp(), cron_schedule=schedule_state.cron_schedule, status=ScheduleTickStatus.STARTED, )) _check_for_debug_crash(debug_crash_flags, "TICK_CREATED") with ScheduleTickHolder(tick, instance) as tick_holder: _check_for_debug_crash(debug_crash_flags, "TICK_HELD") with RepositoryLocationHandle.create_from_repository_origin( schedule_state.origin.repository_origin, instance) as repo_location_handle: repo_location = RepositoryLocation.from_handle( repo_location_handle) _schedule_run_at_time( instance, repo_location, schedule_state, schedule_time_utc, tick_holder, debug_crash_flags, )
def _mgr_fn(recon_repo): check.inst_param(recon_repo, "recon_repo", ReconstructableRepository) with Workspace([ RepositoryLocationHandle.create_in_process_location( recon_repo.pointer) ]) as workspace: yield workspace
def define_test_snapshot_context(): return DagsterGraphQLContext( instance=DagsterInstance.ephemeral(), workspace=Workspace([ RepositoryLocationHandle.create_in_process_location( create_main_recon_repo().pointer) ]), )
def repository_location_handle_from_run(pipeline_run): check.inst_param(pipeline_run, "pipeline_run", PipelineRun) external_pipeline_origin = check.inst( pipeline_run.external_pipeline_origin, ExternalPipelineOrigin) yield RepositoryLocationHandle.create_from_repository_location_origin( external_pipeline_origin.external_repository_origin. repository_location_origin)
def get_example_repository_location_handle(): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=file_relative_path(__file__, "repo.py"), ) location_name = "example_repo_location" origin = ManagedGrpcPythonEnvRepositoryLocationOrigin(loadable_target_origin, location_name) return RepositoryLocationHandle.create_from_repository_location_origin(origin)
def get_test_project_external_repo(container_image=None): return RepositoryLocation.from_handle( RepositoryLocationHandle.create_from_repository_location_origin( InProcessRepositoryLocationOrigin( ReconstructableRepository.for_file( file_relative_path(__file__, "test_pipelines/repo.py"), "define_demo_execution_repo", container_image=container_image, )))).get_repository("demo_execution_repo")
def get_test_external_repo(): return PythonEnvRepositoryLocation( RepositoryLocationHandle.create_out_of_process_location( location_name='test_location', repository_code_pointer_dict={ 'test_repository': FileCodePointer(__file__, 'test_repository'), }, )).get_repository('test_repository')
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) # Construct Dagster run tags with user defined k8s config. expected_resources = { "requests": {"cpu": "250m", "memory": "64Mi"}, "limits": {"cpu": "500m", "memory": "2560Mi"}, } user_defined_k8s_config = UserDefinedDagsterK8sConfig( container_config={"resources": expected_resources}, ) user_defined_k8s_config_json = json.dumps(user_defined_k8s_config.to_dict()) tags = {"dagster-k8s/config": user_defined_k8s_config_json} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() location_origin = InProcessRepositoryLocationOrigin(recon_repo) location_handle = RepositoryLocationHandle.create_from_repository_location_origin( location_origin, ) repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location_handle=location_handle, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. with instance_for_test() as instance: pipeline_name = "demo_pipeline" run = create_run_for_test(instance, pipeline_name=pipeline_name, tags=tags) k8s_run_launcher.initialize(instance) k8s_run_launcher.launch_run(None, run, fake_external_pipeline) # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" job_resources = kwargs["body"].spec.template.spec.containers[0].resources assert job_resources == expected_resources
def define_context_for_file(python_file, fn_name, instance): check.inst_param(instance, "instance", DagsterInstance) return DagsterGraphQLContext( workspace=Workspace([ RepositoryLocationHandle.create_in_process_location( CodePointer.from_python_file(python_file, fn_name, None)) ]), instance=instance, )
def location_handle_from_python_file(python_file, attribute, user_process_api, location_name=None, working_directory=None): check.str_param(python_file, 'python_file') check.opt_str_param(attribute, 'attribute') check.inst_param(user_process_api, 'user_process_api', UserProcessApi) check.opt_str_param(location_name, 'location_name') check.opt_str_param(working_directory, 'working_directory') if user_process_api == UserProcessApi.GRPC: return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=python_file, module_name=None, working_directory=working_directory, attribute=attribute, ), location_name=location_name, ) loadable_targets = ([ LoadableTarget( attribute, load_def_in_python_file(python_file, attribute, working_directory)) ] if attribute else loadable_targets_from_python_file( python_file, working_directory)) repository_code_pointer_dict = {} for loadable_target in loadable_targets: repository_code_pointer_dict[loadable_target.target_definition. name] = CodePointer.from_python_file( python_file, loadable_target.attribute, working_directory) return RepositoryLocationHandle.create_out_of_process_location( repository_code_pointer_dict=repository_code_pointer_dict, # default to the name of the repository symbol for now location_name=assign_location_name(location_name, repository_code_pointer_dict), )
def define_context_for_repository_yaml(path, instance): check.inst_param(instance, "instance", DagsterInstance) return DagsterGraphQLContext( workspace=Workspace([ RepositoryLocationHandle.create_in_process_location( ReconstructableRepository.from_legacy_repository_yaml( path).pointer) ]), instance=instance, )
def location_handle_from_python_file( python_file, attribute, user_process_api, location_name=None, working_directory=None, executable_path=sys.executable, ): check.str_param(python_file, 'python_file') check.opt_str_param(attribute, 'attribute') check.inst_param(user_process_api, 'user_process_api', UserProcessApi) check.opt_str_param(location_name, 'location_name') check.opt_str_param(working_directory, 'working_directory') if user_process_api == UserProcessApi.GRPC: return RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( executable_path=executable_path, python_file=python_file, module_name=None, working_directory=working_directory, attribute=attribute, ), location_name=location_name, ) else: response = sync_list_repositories( executable_path=executable_path, python_file=python_file, module_name=None, working_directory=working_directory, attribute=attribute, ) return RepositoryLocationHandle.create_python_env_location( executable_path=executable_path, location_name=location_name, repository_code_pointer_dict={ lrs.repository_name: CodePointer.from_python_file(python_file, lrs.attribute, working_directory) for lrs in response.repository_symbols }, )
def get_test_external_repo(): return PythonEnvRepositoryLocation( RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="test_repository", ), location_name="test_location", )).get_repository("test_repository")
def cli_api_repo(): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="the_repo", ) yield PythonEnvRepositoryLocation( RepositoryLocationHandle.create_python_env_location( loadable_target_origin=loadable_target_origin, location_name="test_location", ) ).get_repository("the_repo")
def get_test_external_repo(): with RepositoryLocationHandle.create_from_repository_location_origin( ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="test_repository", ), location_name="test_location", ) ) as handle: yield RepositoryLocation.from_handle(handle).get_repository("test_repository")
def get_main_external_repo(): with RepositoryLocationHandle.create_from_repository_location_origin( location_origin_from_python_file( python_file=file_relative_path(__file__, "setup.py"), attribute=main_repo_name(), working_directory=None, location_name=main_repo_location_name(), )) as handle: yield RepositoryLocation.from_handle(handle).get_repository( main_repo_name())
def default_repo(): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=os.getcwd(), ) with RepositoryLocationHandle.create_from_repository_location_origin( ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=loadable_target_origin, location_name="test_location", ) ) as handle: yield RepositoryLocation.from_handle(handle).get_repository("the_repo")
def test_dagster_out_of_process_location(): env = PythonEnvRepositoryLocation( RepositoryLocationHandle.create_out_of_process_location( location_name='test_location', repository_code_pointer_dict={ 'test_repo': FileCodePointer(file_relative_path(__file__, 'setup.py'), 'test_repo'), }, )) assert env.get_repository('test_repo')
def test_dagster_out_of_process_location(): env = PythonEnvRepositoryLocation( RepositoryLocationHandle.create_python_env_location( location_name="test_location", loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=file_relative_path(__file__, "setup.py"), attribute="test_repo", ), )) assert env.get_repository("test_repo")
def get_test_external_repo(): return RepositoryLocation.from_handle( RepositoryLocationHandle.create_python_env_location( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="test_repository", ), location_name="test_location", user_process_api=UserProcessApi.CLI, )).get_repository("test_repository")
def __init__(self, recon_repo): self._recon_repo = check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository) self._handle = RepositoryLocationHandle.create_in_process_location(recon_repo.pointer) repo_def = recon_repo.get_definition() def_name = repo_def.name self._external_repo = external_repo_from_def( repo_def, RepositoryHandle(repository_name=def_name, repository_location_handle=self._handle), ) self._repositories = {self._external_repo.name: self._external_repo}
def launch_scheduled_runs( instance, logger, end_datetime_utc, max_catchup_runs=_DEFAULT_MAX_CATCHUP_RUNS, debug_crash_flags=None, ): schedules = [ s for s in instance.all_stored_schedule_state() if s.status == ScheduleStatus.RUNNING ] if not isinstance(instance.scheduler, DagsterCommandLineScheduler): raise DagsterInvariantViolationError( """Your dagster.yaml must be configured as follows in order to use dagster-scheduler: scheduler: module: dagster.core.scheduler class: DagsterCommandLineScheduler """, ) if not schedules: logger.info("Not checking for any runs since no schedules have been started.") return logger.info( "Checking for new runs for the following schedules: {schedule_names}".format( schedule_names=", ".join([schedule.name for schedule in schedules]), ) ) for schedule_state in schedules: try: with RepositoryLocationHandle.create_from_repository_origin( schedule_state.origin.repository_origin, instance ) as repo_location_handle: repo_location = RepositoryLocation.from_handle(repo_location_handle) launch_scheduled_runs_for_schedule( instance, logger, schedule_state, repo_location, end_datetime_utc, max_catchup_runs, (debug_crash_flags.get(schedule_state.name) if debug_crash_flags else None), ) except Exception: # pylint: disable=broad-except logger.error( "Scheduler failed for {schedule_name} : {error_info}".format( schedule_name=schedule_state.name, error_info=serializable_error_info_from_exc_info(sys.exc_info()).to_string(), ) )
def define_out_of_process_context(python_file, fn_name, instance): check.inst_param(instance, 'instance', DagsterInstance) return DagsterGraphQLContext( locations=[ PythonEnvRepositoryLocation( RepositoryLocationHandle.create_out_of_process_location( 'test_location', {fn_name: FileCodePointer(python_file, fn_name)})) ], instance=instance, )