def get_papermill_parameters(compute_context, inputs, output_log_path): check.inst_param(compute_context, 'compute_context', SystemComputeExecutionContext) check.param_invariant( isinstance(compute_context.environment_dict, dict), 'compute_context', 'SystemComputeExecutionContext must have valid environment_dict', ) check.dict_param(inputs, 'inputs', key_type=six.string_types) run_id = compute_context.run_id marshal_dir = '/tmp/dagstermill/{run_id}/marshal'.format(run_id=run_id) mkdir_p(marshal_dir) (handle, solid_subset) = ExecutionTargetHandle.get_handle( compute_context.pipeline_def) if not handle: raise DagstermillError( 'Can\'t execute a dagstermill solid from a pipeline that wasn\'t instantiated using ' 'an ExecutionTargetHandle') dm_handle_kwargs = handle.data._asdict() dm_handle_kwargs['pipeline_name'] = compute_context.pipeline_def.name dm_context_dict = { 'output_log_path': output_log_path, 'marshal_dir': marshal_dir, 'environment_dict': compute_context.environment_dict, } dm_solid_handle_kwargs = compute_context.solid_handle._asdict() parameters = {} input_def_dict = compute_context.solid_def.input_dict for input_name, input_value in inputs.items(): assert ( input_name not in RESERVED_INPUT_NAMES ), 'Dagstermill solids cannot have inputs named {input_name}'.format( input_name=input_name) dagster_type = input_def_dict[input_name].dagster_type parameter_value = write_value( dagster_type, input_value, os.path.join(marshal_dir, 'input-{}'.format(input_name))) parameters[input_name] = parameter_value parameters['__dm_context'] = dm_context_dict parameters['__dm_handle_kwargs'] = dm_handle_kwargs parameters['__dm_pipeline_run_dict'] = pack_value( compute_context.pipeline_run) parameters['__dm_solid_handle_kwargs'] = dm_solid_handle_kwargs parameters['__dm_solid_subset'] = solid_subset parameters['__dm_instance_ref_dict'] = pack_value( compute_context.instance.get_ref()) return parameters
def get_papermill_parameters(step_context, inputs, output_log_path): check.inst_param(step_context, "step_context", StepExecutionContext) check.param_invariant( isinstance(step_context.run_config, dict), "step_context", "StepExecutionContext must have valid run_config", ) check.dict_param(inputs, "inputs", key_type=str) run_id = step_context.run_id temp_dir = get_system_temp_directory() marshal_dir = os.path.normpath( os.path.join(temp_dir, "dagstermill", str(run_id), "marshal")) mkdir_p(marshal_dir) if not isinstance(step_context.pipeline, ReconstructablePipeline): raise DagstermillError( "Can't execute a dagstermill solid from a pipeline that is not reconstructable. " "Use the reconstructable() function if executing from python") dm_executable_dict = step_context.pipeline.to_dict() dm_context_dict = { "output_log_path": output_log_path, "marshal_dir": marshal_dir, "run_config": step_context.run_config, } dm_solid_handle_kwargs = step_context.solid_handle._asdict() parameters = {} input_def_dict = step_context.solid_def.input_dict for input_name, input_value in inputs.items(): assert ( input_name not in RESERVED_INPUT_NAMES ), "Dagstermill solids cannot have inputs named {input_name}".format( input_name=input_name) dagster_type = input_def_dict[input_name].dagster_type parameter_value = write_value( dagster_type, input_value, os.path.join( marshal_dir, f"{str(step_context.solid_handle)}-input-{input_name}"), ) parameters[input_name] = parameter_value parameters["__dm_context"] = dm_context_dict parameters["__dm_executable_dict"] = dm_executable_dict parameters["__dm_pipeline_run_dict"] = pack_value( step_context.pipeline_run) parameters["__dm_solid_handle_kwargs"] = dm_solid_handle_kwargs parameters["__dm_instance_ref_dict"] = pack_value( step_context.instance.get_ref()) return parameters
def get_papermill_parameters(compute_context, inputs, output_log_path): check.inst_param(compute_context, "compute_context", SystemComputeExecutionContext) check.param_invariant( isinstance(compute_context.run_config, dict), "compute_context", "SystemComputeExecutionContext must have valid run_config", ) check.dict_param(inputs, "inputs", key_type=six.string_types) run_id = compute_context.run_id marshal_dir = "/tmp/dagstermill/{run_id}/marshal".format(run_id=run_id) mkdir_p(marshal_dir) if not isinstance(compute_context.pipeline, ReconstructablePipeline): raise DagstermillError( "Can't execute a dagstermill solid from a pipeline that is not reconstructable. " "Use the reconstructable() function if executing from python") dm_executable_dict = compute_context.pipeline.to_dict() dm_context_dict = { "output_log_path": output_log_path, "marshal_dir": marshal_dir, "run_config": compute_context.run_config, } dm_solid_handle_kwargs = compute_context.solid_handle._asdict() parameters = {} input_def_dict = compute_context.solid_def.input_dict for input_name, input_value in inputs.items(): assert ( input_name not in RESERVED_INPUT_NAMES ), "Dagstermill solids cannot have inputs named {input_name}".format( input_name=input_name) dagster_type = input_def_dict[input_name].dagster_type parameter_value = write_value( dagster_type, input_value, os.path.join(marshal_dir, "input-{}".format(input_name))) parameters[input_name] = parameter_value parameters["__dm_context"] = dm_context_dict parameters["__dm_executable_dict"] = dm_executable_dict parameters["__dm_pipeline_run_dict"] = pack_value( compute_context.pipeline_run) parameters["__dm_solid_handle_kwargs"] = dm_solid_handle_kwargs parameters["__dm_instance_ref_dict"] = pack_value( compute_context.instance.get_ref()) return parameters
def get_papermill_parameters(step_context, inputs, output_log_path, compute_descriptor): check.inst_param(step_context, "step_context", StepExecutionContext) check.param_invariant( isinstance(step_context.run_config, dict), "step_context", "StepExecutionContext must have valid run_config", ) check.dict_param(inputs, "inputs", key_type=str) run_id = step_context.run_id temp_dir = get_system_temp_directory() marshal_dir = os.path.normpath( os.path.join(temp_dir, "dagstermill", str(run_id), "marshal")) mkdir_p(marshal_dir) if not isinstance(step_context.pipeline, ReconstructablePipeline): if compute_descriptor == "solid": raise DagstermillError( "Can't execute a dagstermill solid from a pipeline that is not reconstructable. " "Use the reconstructable() function if executing from python") else: raise DagstermillError( "Can't execute a dagstermill op from a job that is not reconstructable. " "Use the reconstructable() function if executing from python") dm_executable_dict = step_context.pipeline.to_dict() dm_context_dict = { "output_log_path": output_log_path, "marshal_dir": marshal_dir, "run_config": step_context.run_config, } dm_solid_handle_kwargs = step_context.solid_handle._asdict() dm_step_key = step_context.step.key parameters = {} parameters["__dm_context"] = dm_context_dict parameters["__dm_executable_dict"] = dm_executable_dict parameters["__dm_pipeline_run_dict"] = pack_value( step_context.pipeline_run) parameters["__dm_solid_handle_kwargs"] = dm_solid_handle_kwargs parameters["__dm_instance_ref_dict"] = pack_value( step_context.instance.get_ref()) parameters["__dm_step_key"] = dm_step_key parameters["__dm_input_names"] = list(inputs.keys()) return parameters
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority, known_state): user_defined_k8s_config = get_user_defined_k8s_config(step.tags) execute_step_args = ExecuteStepArgs( pipeline_origin=pipeline_context.reconstructable_pipeline. get_python_origin(), pipeline_run_id=pipeline_context.pipeline_run.run_id, step_keys_to_execute=[step.key], instance_ref=pipeline_context.instance.get_ref(), retry_mode=pipeline_context.executor.retries.for_inner_plan(), known_state=known_state, should_verify_step=True, ) task = create_k8s_job_task(app) task_signature = task.si( execute_step_args_packed=pack_value(execute_step_args), job_config_dict=pipeline_context.executor.job_config.to_dict(), job_namespace=pipeline_context.executor.job_namespace, user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(), load_incluster_config=pipeline_context.executor.load_incluster_config, kubeconfig_file=pipeline_context.executor.kubeconfig_file, ) return task_signature.apply_async( priority=priority, queue=queue, routing_key="{queue}.execute_step_k8s_job".format(queue=queue), )
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority): user_defined_k8s_config = get_user_defined_k8s_config(step.tags) task = create_k8s_job_task(app) recon_repo = pipeline_context.pipeline.get_reconstructable_repository() task_signature = task.si( instance_ref_dict=pipeline_context.instance.get_ref().to_dict(), step_keys=[step.key], run_config=pipeline_context.pipeline_run.run_config, mode=pipeline_context.pipeline_run.mode, repo_name=recon_repo.get_definition().name, repo_location_name=pipeline_context.executor.repo_location_name, run_id=pipeline_context.pipeline_run.run_id, job_config_dict=pipeline_context.executor.job_config.to_dict(), job_namespace=pipeline_context.executor.job_namespace, user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(), retries_dict=pipeline_context.executor.retries.for_inner_plan().to_config(), pipeline_origin_packed=pack_value(pipeline_context.pipeline.get_origin()), load_incluster_config=pipeline_context.executor.load_incluster_config, kubeconfig_file=pipeline_context.executor.kubeconfig_file, ) return task_signature.apply_async( priority=priority, queue=queue, routing_key="{queue}.execute_step_k8s_job".format(queue=queue), )
def in_pipeline_manager(pipeline_name='hello_world_pipeline', solid_handle=SolidHandle('hello_world', 'hello_world', None), handle_kwargs=None, mode=None, **kwargs): manager = Manager() run_id = make_new_run_id() instance = DagsterInstance.local_temp() marshal_dir = tempfile.mkdtemp() if not handle_kwargs: handle_kwargs = { 'pipeline_name': pipeline_name, 'module_name': 'dagstermill.examples.repository', 'fn_name': 'define_hello_world_pipeline', } pipeline_run_dict = pack_value( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, mode=mode or 'default', environment_dict=None, selector=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) try: with safe_tempfile_path() as output_log_file_path: context_dict = { 'pipeline_run_dict': pipeline_run_dict, 'solid_handle_kwargs': solid_handle._asdict(), 'handle_kwargs': handle_kwargs, 'marshal_dir': marshal_dir, 'environment_dict': {}, 'output_log_path': output_log_file_path, 'instance_ref_dict': pack_value(instance.get_ref()), } manager.reconstitute_pipeline_context( **dict(context_dict, **kwargs)) yield manager finally: shutil.rmtree(marshal_dir)
def in_pipeline_manager( pipeline_name="hello_world_pipeline", solid_handle=NodeHandle("hello_world", None), step_key="hello_world", executable_dict=None, mode=None, **kwargs, ): manager = Manager() run_id = make_new_run_id() with instance_for_test() as instance: marshal_dir = tempfile.mkdtemp() if not executable_dict: executable_dict = ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_pipeline").to_dict() pipeline_run_dict = pack_value( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, mode=mode or "default", run_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) try: with safe_tempfile_path() as output_log_file_path: context_dict = { "pipeline_run_dict": pipeline_run_dict, "solid_handle_kwargs": solid_handle._asdict(), "executable_dict": executable_dict, "marshal_dir": marshal_dir, "run_config": {}, "output_log_path": output_log_file_path, "instance_ref_dict": pack_value(instance.get_ref()), "step_key": step_key, } manager.reconstitute_pipeline_context( **dict(context_dict, **kwargs)) yield manager finally: shutil.rmtree(marshal_dir)
def _submit_task_docker(app, pipeline_context, step, queue, priority): execute_step_args = ExecuteStepArgs( pipeline_origin=pipeline_context.pipeline.get_python_origin(), pipeline_run_id=pipeline_context.pipeline_run.run_id, step_keys_to_execute=[step.key], instance_ref=pipeline_context.instance.get_ref(), retries_dict=pipeline_context.executor.retries.for_inner_plan().to_config(), ) task = create_docker_task(app) task_signature = task.si( execute_step_args_packed=pack_value(execute_step_args), docker_config=pipeline_context.executor.docker_config, ) return task_signature.apply_async( priority=priority, queue=queue, routing_key="{queue}.execute_step_docker".format(queue=queue), )
def _submit_task(app, pipeline_context, step, queue, priority, known_state): from .tasks import create_task execute_step_args = ExecuteStepArgs( pipeline_origin=pipeline_context.pipeline.get_python_origin(), pipeline_run_id=pipeline_context.pipeline_run.run_id, step_keys_to_execute=[step.key], instance_ref=pipeline_context.instance.get_ref(), retry_mode=pipeline_context.executor.retries.for_inner_plan(), known_state=known_state, ) task = create_task(app) task_signature = task.si( execute_step_args_packed=pack_value(execute_step_args), executable_dict=pipeline_context.pipeline.to_dict(), ) return task_signature.apply_async( priority=priority, queue=queue, routing_key="{queue}.execute_plan".format(queue=queue), )
def _submit_task_k8s_job(app, plan_context, step, queue, priority, known_state): user_defined_k8s_config = get_user_defined_k8s_config(step.tags) pipeline_origin = plan_context.reconstructable_pipeline.get_python_origin() execute_step_args = ExecuteStepArgs( pipeline_origin=pipeline_origin, pipeline_run_id=plan_context.pipeline_run.run_id, step_keys_to_execute=[step.key], instance_ref=plan_context.instance.get_ref(), retry_mode=plan_context.executor.retries.for_inner_plan(), known_state=known_state, should_verify_step=True, ) job_config = plan_context.executor.job_config if not job_config.job_image: job_config = job_config.with_image(pipeline_origin.repository_origin.container_image) if not job_config.job_image: raise Exception("No image included in either executor config or the dagster job") task = create_k8s_job_task(app) task_signature = task.si( execute_step_args_packed=pack_value(execute_step_args), job_config_dict=job_config.to_dict(), job_namespace=plan_context.executor.job_namespace, user_defined_k8s_config_dict=user_defined_k8s_config.to_dict(), load_incluster_config=plan_context.executor.load_incluster_config, job_wait_timeout=plan_context.executor.job_wait_timeout, kubeconfig_file=plan_context.executor.kubeconfig_file, ) return task_signature.apply_async( priority=priority, queue=queue, routing_key="{queue}.execute_step_k8s_job".format(queue=queue), )
def _submit_task_docker(app, pipeline_context, step, queue, priority): task = create_docker_task(app) recon_repo = pipeline_context.pipeline.get_reconstructable_repository() task_signature = task.si( instance_ref_dict=pipeline_context.instance.get_ref().to_dict(), step_keys=[step.key], run_config=pipeline_context.pipeline_run.run_config, mode=pipeline_context.pipeline_run.mode, repo_name=recon_repo.get_definition().name, run_id=pipeline_context.pipeline_run.run_id, docker_config=pipeline_context.executor.docker_config, pipeline_origin_packed=pack_value( pipeline_context.pipeline.get_origin()), retries_dict=pipeline_context.executor.retries.for_inner_plan(). to_config(), ) return task_signature.apply_async( priority=priority, queue=queue, routing_key="{queue}.execute_step_docker".format(queue=queue), )
def to_dict(self): return pack_value(self)