def get_python_origin(self): repository_python_origin = ( self.repository_handle.repository_location_handle.get_repository_python_origin( self.repository_handle.repository_name, ) ) return PipelinePythonOrigin(self.name, repository_python_origin)
def get_pipeline_python_origin_from_kwargs(kwargs): repository_origin = get_repository_python_origin_from_kwargs(kwargs) provided_pipeline_name = kwargs.get("pipeline") recon_repo = recon_repository_from_origin(repository_origin) repo_definition = recon_repo.get_definition() pipeline_names = set(repo_definition.pipeline_names) if provided_pipeline_name is None and len(pipeline_names) == 1: pipeline_name = next(iter(pipeline_names)) elif provided_pipeline_name is None: raise click.UsageError( ("Must provide --pipeline as there is more than one pipeline " "in {repository}. Options are: {pipelines}.").format( repository=repo_definition.name, pipelines=_sorted_quoted(pipeline_names))) elif not provided_pipeline_name in pipeline_names: raise click.UsageError(( 'Pipeline "{provided_pipeline_name}" not found in repository "{repository_name}". ' "Found {found_names} instead.").format( provided_pipeline_name=provided_pipeline_name, repository_name=repo_definition.name, found_names=_sorted_quoted(pipeline_names), )) else: pipeline_name = provided_pipeline_name return PipelinePythonOrigin(pipeline_name, repository_origin=repository_origin)
def test_reconstruct_from_origin(): origin = PipelinePythonOrigin( pipeline_name="foo_pipe", repository_origin=RepositoryPythonOrigin( executable_path="my_python", code_pointer=FileCodePointer( python_file="foo.py", fn_name="bar", working_directory="/", ), container_image="my_image", entry_point=DEFAULT_DAGSTER_ENTRY_POINT, container_context={"docker": { "registry": "my_reg" }}, ), ) recon_pipeline = recon_pipeline_from_origin(origin) assert recon_pipeline.pipeline_name == origin.pipeline_name assert recon_pipeline.repository.pointer == origin.repository_origin.code_pointer assert recon_pipeline.repository.container_image == origin.repository_origin.container_image assert recon_pipeline.repository.executable_path == origin.repository_origin.executable_path assert recon_pipeline.repository.container_context == origin.repository_origin.container_context
def test_queued_pipeline_origin_check(): code_pointer = ModuleCodePointer("fake", "fake") fake_pipeline_origin = ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(code_pointer)), "foo_repo", ), "foo", ) fake_code_origin = PipelinePythonOrigin( pipeline_name="foo", repository_origin=RepositoryPythonOrigin( sys.executable, code_pointer, ), ) PipelineRun( status=PipelineRunStatus.QUEUED, external_pipeline_origin=fake_pipeline_origin, pipeline_code_origin=fake_code_origin, ) with pytest.raises(check.CheckError): PipelineRun(status=PipelineRunStatus.QUEUED) with pytest.raises(check.CheckError): PipelineRun().with_status(PipelineRunStatus.QUEUED)
def launch_run(self, instance, run, external_pipeline): if isinstance( external_pipeline.get_external_origin(). external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}". format(type(repository_location_handle))) repository_name = external_pipeline.repository_handle.repository_name location_name = external_pipeline.repository_handle.repository_location_handle.location_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle. get_repository_python_origin(repository_name), ) else: location_name = 'local' pipeline_origin = external_pipeline.get_python_origin() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) app = self._get_app(location_name) sig = app.signature('launch_run', args=(input_json, ), queue=f"{location_name}-pipelines") result = sig.delay() instance.report_engine_event( "Started Celery task for pipeline (task id: {result.id}).".format( result=result), run, EngineEventData(metadata_entries=[ EventMetadataEntry.text(result.id, "task_id"), ]), ) return run
def get_origin(self): """ Hack! Inject origin that the k8s images will use. The BK image uses a different directory structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the images inside the kind cluster (/dagster_test/test_project). As a result the normal origin won't work, we need to inject this one. """ return PipelinePythonOrigin( self._pipeline_index.name, RepositoryPythonOrigin( executable_path="python", code_pointer=FileCodePointer( "/dagster_test/test_project/test_pipelines/repo.py", "define_demo_execution_repo", ), ), )
def get_python_origin(self): """ Hack! Inject origin that the docker-celery images will use. The BK image uses a different directory structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the test that creates the ReconstructablePipeline. As a result the normal origin won't work, we need to inject this one. """ return PipelinePythonOrigin( self.pipeline_name, RepositoryPythonOrigin( executable_path="python", code_pointer=FileCodePointer( "/dagster_test/test_project/test_pipelines/repo.py", "define_demo_execution_repo", ), container_image=self.repository.container_image, ), )
def get_origin(self): return PipelinePythonOrigin(self.pipeline_name, self.repository.get_origin())
def launch_run(self, instance, run, external_pipeline): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if exc_config.get("job_image"): raise DagsterInvariantViolationError( "Cannot specify job_image in executor config when loading pipeline " "from GRPC server." ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}".format( type(repository_location_handle) ) ) job_image = repository_location_handle.get_current_image() env_vars = {"DAGSTER_CURRENT_IMAGE": job_image} repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle.get_repository_python_origin( repository_name ), ) else: job_image = exc_config.get("job_image") if not job_image: raise DagsterInvariantViolationError( "Cannot find job_image in celery-k8s executor config." ) pipeline_origin = external_pipeline.get_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, "job_image"), image_pull_policy=exc_config.get("image_pull_policy"), image_pull_secrets=exc_config.get("image_pull_secrets"), service_account_name=exc_config.get("service_account_name"), env_config_maps=exc_config.get("env_config_maps"), env_secrets=exc_config.get("env_secrets"), ) user_defined_k8s_config = get_user_defined_k8s_config(frozentags(external_pipeline.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config, command=["dagster"], args=["api", "execute_run_with_structured_logs", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, ) job_namespace = exc_config.get("job_namespace") api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run_coordinator job launched", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(pod_name, "Kubernetes Pod name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None job_image_from_executor_config = exc_config.get("job_image") # If the user is using user-code deployments, we grab the image from the gRPC server. if isinstance( external_pipeline.get_external_origin(). external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}". format(type(repository_location_handle))) repository_name = external_pipeline.repository_handle.repository_name repository_origin = repository_location_handle.reload_repository_python_origin( repository_name) pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_origin) job_image = repository_origin.container_image env_vars = {"DAGSTER_CURRENT_IMAGE": job_image} if job_image_from_executor_config: raise DagsterInvariantViolationError( "You have specified a job_image {job_image_from_executor_config} in your executor configuration, " "but also {job_image} in your user-code deployment. You cannot specify a job_image " "in your executor config when using user-code deployments because the job image is " "pulled from the deployment. To resolve this error, remove the job_image " "configuration from your executor configuration (which is a part of your run configuration)" ) else: if not job_image_from_executor_config: raise DagsterInvariantViolationError( "You have not specified a job_image in your executor configuration. " "To resolve this error, specify the job_image configuration in the executor " "config section in your run config. \n" "Note: You may also be seeing this error because you are using the configured API. " "Using configured with the celery-k8s executor is not supported at this time, " "and the job_image must be configured at the top-level executor config without " "using configured.") job_image = job_image_from_executor_config pipeline_origin = external_pipeline.get_python_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, "job_image"), image_pull_policy=exc_config.get("image_pull_policy"), image_pull_secrets=exc_config.get("image_pull_secrets"), service_account_name=exc_config.get("service_account_name"), env_config_maps=exc_config.get("env_config_maps"), env_secrets=exc_config.get("env_secrets"), ) user_defined_k8s_config = get_user_defined_k8s_config( frozentags(run.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) job = construct_dagster_k8s_job( job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, env_vars=env_vars, ) job_namespace = exc_config.get("job_namespace") self._batch_api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( "Kubernetes run_coordinator job launched", run, EngineEventData([ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ]), cls=self.__class__, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, "run", PipelineRun) check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) job_name = "dagster-run-{}".format(run.run_id) pod_name = job_name user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags)) pipeline_origin = None job_config = None if isinstance( external_pipeline.get_external_origin().external_repository_origin.repository_location_origin, GrpcServerRepositoryLocationOrigin, ): if self._job_image: raise DagsterInvariantViolationError( "Cannot specify job_image in run launcher config when loading pipeline " "from GRPC server." ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( "Expected RepositoryLocationHandle to be of type " "GrpcServerRepositoryLocationHandle but found type {}".format( type(repository_location_handle) ) ) repository_name = external_pipeline.repository_handle.repository_name repository_origin = repository_location_handle.reload_repository_python_origin( repository_name ) job_image = repository_origin.container_image pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_origin ) job_config = self._get_grpc_job_config(job_image) else: pipeline_origin = external_pipeline.get_python_origin() job_config = self._get_static_job_config() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config=job_config, args=["dagster", "api", "execute_run", input_json], job_name=job_name, pod_name=pod_name, component="run_coordinator", user_defined_k8s_config=user_defined_k8s_config, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( "Kubernetes run worker job launched", run, EngineEventData( [ EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"), EventMetadataEntry.text(run.run_id, "Run ID"), ] ), cls=self.__class__, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if exc_config.get('job_image'): raise DagsterInvariantViolationError( 'Cannot specify job_image in executor config when loading pipeline ' 'from GRPC server.') repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( 'Expected RepositoryLocationHandle to be of type ' 'GrpcServerRepositoryLocationHandle but found type {}'. format(type(repository_location_handle))) job_image = repository_location_handle.get_current_image() env_vars = {'DAGSTER_CURRENT_IMAGE': job_image} repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle. get_repository_python_origin(repository_name), ) else: job_image = exc_config.get('job_image') if not job_image: raise DagsterInvariantViolationError( 'Cannot find job_image in celery-k8s executor config.') pipeline_origin = external_pipeline.get_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, 'job_image'), image_pull_policy=exc_config.get('image_pull_policy'), image_pull_secrets=exc_config.get('image_pull_secrets'), service_account_name=exc_config.get('service_account_name'), env_config_maps=exc_config.get('env_config_maps'), env_secrets=exc_config.get('env_secrets'), ) resources = get_k8s_resource_requirements( frozentags(external_pipeline.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) job = construct_dagster_k8s_job( job_config, command=['dagster'], args=['api', 'execute_run_with_structured_logs', input_json], job_name=job_name, pod_name=pod_name, component='run_coordinator', resources=resources, env_vars=env_vars, ) job_namespace = exc_config.get('job_namespace') api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( 'Kubernetes run_coordinator job launched', run, EngineEventData([ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ]), cls=CeleryK8sRunLauncher, ) return run
def get_python_origin(self): repository_python_origin = self.repository_handle.get_python_origin() return PipelinePythonOrigin(self.name, repository_python_origin)
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline) job_name = 'dagster-run-{}'.format(run.run_id) pod_name = job_name resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags)) pipeline_origin = None job_config = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if self._job_image: raise DagsterInvariantViolationError( 'Cannot specify job_image in run launcher config when loading pipeline ' 'from GRPC server.' ) repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle ) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( 'Expected RepositoryLocationHandle to be of type ' 'GrpcServerRepositoryLocationHandle but found type {}'.format( type(repository_location_handle) ) ) job_image = repository_location_handle.get_current_image() job_config = self._get_grpc_job_config(job_image) repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle.get_repository_python_origin( repository_name ), ) else: pipeline_origin = external_pipeline.get_origin() job_config = self._get_static_job_config() input_json = serialize_dagster_namedtuple( ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config=job_config, command=['dagster'], args=['api', 'execute_run_with_structured_logs', input_json], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData( [ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(self.job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ] ), cls=K8sRunLauncher, ) return run