def test_resource_tags(): @solid( tags={ K8S_RESOURCE_REQUIREMENTS_KEY: { 'requests': {'cpu': '250m', 'memory': '64Mi'}, 'limits': {'cpu': '500m', 'memory': '2560Mi'}, } } ) def resource_tags_solid(_): pass resources = get_k8s_resource_requirements(resource_tags_solid.tags) assert resources == json.loads(json.dumps(resources)) assert resources['requests']['cpu'] == '250m' assert resources['requests']['memory'] == '64Mi' assert resources['limits']['cpu'] == '500m' assert resources['limits']['memory'] == '2560Mi' @solid def no_resource_tags_solid(_): pass no_resources = get_k8s_resource_requirements(no_resource_tags_solid.tags) assert no_resources == None
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority): resources = get_k8s_resource_requirements(step.tags) task = create_k8s_job_task(app) recon_repo = pipeline_context.pipeline.get_reconstructable_repository() task_signature = task.si( instance_ref_dict=pipeline_context.instance.get_ref().to_dict(), step_keys=[step.key], run_config=pipeline_context.pipeline_run.run_config, mode=pipeline_context.pipeline_run.mode, repo_name=recon_repo.get_definition().name, repo_location_name=pipeline_context.executor.repo_location_name, run_id=pipeline_context.pipeline_run.run_id, job_config_dict=pipeline_context.executor.job_config.to_dict(), job_namespace=pipeline_context.executor.job_namespace, resources=resources, retries_dict=pipeline_context.executor.retries.for_inner_plan().to_config(), load_incluster_config=pipeline_context.executor.load_incluster_config, kubeconfig_file=pipeline_context.executor.kubeconfig_file, ) return task_signature.apply_async( priority=priority, queue=queue, routing_key='{queue}.execute_step_k8s_job'.format(queue=queue), )
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority): from .tasks import create_k8s_job_task resources = get_k8s_resource_requirements(step.tags) task = create_k8s_job_task(app) task_signature = task.si( instance_ref_dict=pipeline_context.instance.get_ref().to_dict(), step_keys=[step.key], environment_dict=pipeline_context.pipeline_run.environment_dict, mode=pipeline_context.pipeline_run.mode, pipeline_name=pipeline_context.pipeline_run.pipeline_name, run_id=pipeline_context.pipeline_run.run_id, job_config_dict=pipeline_context.executor_config.job_config.to_dict(), job_namespace=pipeline_context.executor_config.job_namespace, resources=resources, load_incluster_config=pipeline_context.executor_config.load_incluster_config, kubeconfig_file=pipeline_context.executor_config.kubeconfig_file, ) return task_signature.apply_async( priority=priority, queue=queue, routing_key='{queue}.execute_step_k8s_job'.format(queue=queue), )
def test_valid_job_format_with_resources(run_launcher): docker_image = test_project_docker_image() run_config = load_yaml_from_path( os.path.join(test_project_environments_path(), 'env.yaml')) pipeline_name = 'demo_pipeline' run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config) tags = validate_tags({ K8S_RESOURCE_REQUIREMENTS_KEY: ({ 'requests': { 'cpu': '250m', 'memory': '64Mi' }, 'limits': { 'cpu': '500m', 'memory': '2560Mi' }, }) }) resources = get_k8s_resource_requirements(tags) job_name = 'dagster-run-%s' % run.run_id pod_name = 'dagster-run-%s' % run.run_id job = construct_dagster_k8s_job( job_config=run_launcher.job_config, command=['dagster-graphql'], args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({'runId': run.run_id}), ], job_name=job_name, resources=resources, pod_name=pod_name, component='runmaster', ) assert (yaml.dump( remove_none_recursively(job.to_dict()), default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format( run_id=run.run_id, job_image=docker_image, image_pull_policy=image_pull_policy(), dagster_version=dagster_version, resources=''' resources: limits: cpu: 500m memory: 2560Mi requests: cpu: 250m memory: 64Mi''', ).strip())
def test_resource_tags(): @solid( tags={ K8S_RESOURCE_REQUIREMENTS_KEY: { 'requests': { 'cpu': '250m', 'memory': '64Mi' }, 'limits': { 'cpu': '500m', 'memory': '2560Mi' }, } }) def boop(_): pass resources = get_k8s_resource_requirements(boop.tags) assert resources.requests['cpu'] == '250m' assert resources.requests['memory'] == '64Mi' assert resources.limits['cpu'] == '500m' assert resources.limits['memory'] == '2560Mi'
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=exc_config.get('job_image'), image_pull_policy=exc_config.get('image_pull_policy'), image_pull_secrets=exc_config.get('image_pull_secrets'), service_account_name=exc_config.get('service_account_name'), env_config_maps=exc_config.get('env_config_maps'), env_secrets=exc_config.get('env_secrets'), ) resources = get_k8s_resource_requirements( frozentags(external_pipeline.tags)) job = construct_dagster_graphql_k8s_job( job_config, args=[ '-p', 'executeRunInProcess', '-v', seven.json.dumps({ 'runId': run.run_id, 'repositoryName': external_pipeline.handle.repository_name, 'repositoryLocationName': external_pipeline.handle.location_name, }), '--remap-sigterm', ], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) job_namespace = exc_config.get('job_namespace') api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData([ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ]), cls=CeleryK8sRunLauncher, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_image = None pipeline_origin = None env_vars = None if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin): if exc_config.get('job_image'): raise DagsterInvariantViolationError( 'Cannot specify job_image in executor config when loading pipeline ' 'from GRPC server.') repository_location_handle = ( external_pipeline.repository_handle.repository_location_handle) if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle): raise DagsterInvariantViolationError( 'Expected RepositoryLocationHandle to be of type ' 'GrpcServerRepositoryLocationHandle but found type {}'. format(type(repository_location_handle))) job_image = repository_location_handle.get_current_image() env_vars = {'DAGSTER_CURRENT_IMAGE': job_image} repository_name = external_pipeline.repository_handle.repository_name pipeline_origin = PipelinePythonOrigin( pipeline_name=external_pipeline.name, repository_origin=repository_location_handle. get_repository_python_origin(repository_name), ) else: job_image = exc_config.get('job_image') if not job_image: raise DagsterInvariantViolationError( 'Cannot find job_image in celery-k8s executor config.') pipeline_origin = external_pipeline.get_origin() job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=check.str_param(job_image, 'job_image'), image_pull_policy=exc_config.get('image_pull_policy'), image_pull_secrets=exc_config.get('image_pull_secrets'), service_account_name=exc_config.get('service_account_name'), env_config_maps=exc_config.get('env_config_maps'), env_secrets=exc_config.get('env_secrets'), ) resources = get_k8s_resource_requirements( frozentags(external_pipeline.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None, )) job = construct_dagster_k8s_job( job_config, command=['dagster'], args=['api', 'execute_run_with_structured_logs', input_json], job_name=job_name, pod_name=pod_name, component='run_coordinator', resources=resources, env_vars=env_vars, ) job_namespace = exc_config.get('job_namespace') api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( 'Kubernetes run_coordinator job launched', run, EngineEventData([ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ]), cls=CeleryK8sRunLauncher, ) return run
def launch_run(self, instance, run, external_pipeline): check.inst_param(run, 'run', PipelineRun) job_name = get_job_name_from_run_id(run.run_id) pod_name = job_name exc_config = _get_validated_celery_k8s_executor_config(run.run_config) job_config = DagsterK8sJobConfig( dagster_home=self.dagster_home, instance_config_map=self.instance_config_map, postgres_password_secret=self.postgres_password_secret, job_image=exc_config.get('job_image'), image_pull_policy=exc_config.get('image_pull_policy'), image_pull_secrets=exc_config.get('image_pull_secrets'), service_account_name=exc_config.get('service_account_name'), env_config_maps=exc_config.get('env_config_maps'), env_secrets=exc_config.get('env_secrets'), ) resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags)) from dagster.cli.api import ExecuteRunArgs input_json = serialize_dagster_namedtuple( # depends on DagsterInstance.get() returning the same instance # https://github.com/dagster-io/dagster/issues/2757 ExecuteRunArgs( pipeline_origin=external_pipeline.get_origin(), pipeline_run_id=run.run_id, instance_ref=None, ) ) job = construct_dagster_k8s_job( job_config, command=['dagster'], args=['api', 'execute_run_with_structured_logs', input_json], job_name=job_name, pod_name=pod_name, component='runmaster', resources=resources, ) job_namespace = exc_config.get('job_namespace') api = kubernetes.client.BatchV1Api() api.create_namespaced_job(body=job, namespace=job_namespace) self._instance.report_engine_event( 'Kubernetes runmaster job launched', run, EngineEventData( [ EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'), EventMetadataEntry.text(run.run_id, 'Run ID'), ] ), cls=CeleryK8sRunLauncher, ) return run