def test_snapshot_command_pipeline_solid_subset(): with get_temp_file_name() as output_file: runner = CliRunner() result = runner.invoke( pipeline_snapshot_command, [ output_file, '-y', file_relative_path(__file__, 'repository_file.yaml'), 'foo', '--solid-subset', 'do_input', ], ) assert result.exit_code == 0 # Now that we have the snapshot make sure that it can be properly deserialized messages = list(ipc_read_event_stream(output_file)) assert len(messages) == 1 external_pipeline_data = messages[0] assert isinstance(external_pipeline_data, ExternalPipelineData) assert external_pipeline_data.name == 'foo' assert (len(external_pipeline_data.pipeline_snapshot. solid_definitions_snapshot.solid_def_snaps) == 1)
def _execute_pipeline_command( repository_file, pipeline_name, environment_dict, mode=None, solid_subset=None ): with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir: output_file_name = "{}.json".format(uuid4()) output_file = os.path.join(tmp_dir, output_file_name) command = ( "dagster api execute_pipeline -y {repository_file} {pipeline_name} " "{output_file} --environment-dict='{environment_dict}' --mode={mode}".format( repository_file=repository_file, pipeline_name=pipeline_name, output_file=output_file, environment_dict=json.dumps(environment_dict), mode=mode, ) ) if solid_subset: command += " --solid_subset={solid_subset}".format(solid_subset=",".join(solid_subset)) os.popen(command) for message in ipc_read_event_stream(output_file): yield message
def test_snapshot_command_pipeline_solid_selection(): with get_temp_file_name() as output_file: runner = CliRunner() solid_selection = ['do_input'] result = runner.invoke( pipeline_subset_snapshot_command, [ output_file, '-y', file_relative_path(__file__, 'repository_file.yaml'), 'foo', '--solid-selection={solid_selection}'.format( solid_selection=json.dumps(solid_selection)), ], ) assert result.exit_code == 0 # Now that we have the snapshot make sure that it can be properly deserialized messages = list(ipc_read_event_stream(output_file)) assert len(messages) == 1 subset_result = messages[0] assert isinstance(subset_result, ExternalPipelineSubsetResult) assert subset_result.external_pipeline_data.name == 'foo' assert (len(subset_result.external_pipeline_data.pipeline_snapshot. solid_definitions_snapshot.solid_def_snaps) == 1)
def cli_api_execute_run(instance, pipeline_origin, pipeline_run): with safe_tempfile_path() as output_file: with safe_tempfile_path() as input_file: _process = _cli_api_execute_run_process( input_file, output_file, instance, pipeline_origin, pipeline_run ) event_list = list(ipc_read_event_stream(output_file)) check.inst(event_list[0], ExecuteRunArgsLoadComplete) return event_list[1:]
def test_write_empty_stream(): with safe_tempfile_path() as f: with ipc_write_stream(f) as _: pass messages = [] for message in ipc_read_event_stream(f): messages.append(message) assert len(messages) == 0
def test_write_error_stream(): with safe_tempfile_path() as filename: with ipc_write_stream(filename) as _: raise Exception("uh oh") messages = [] for message in ipc_read_event_stream(filename): messages.append(message) assert len(messages) == 1 message = messages[0] assert isinstance(message, IPCErrorMessage) assert "uh oh" in message.serializable_error_info.message
def test_execute_run_api(repo_handle): with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) pipeline_run = instance.create_run( pipeline_name='foo', run_id=None, run_config={}, mode='default', solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, parent_pipeline_snapshot=None, ) with safe_tempfile_path() as output_file_path: process = cli_api_execute_run( output_file=output_file_path, instance=instance, pipeline_origin=repo_handle.get_origin(), pipeline_run=pipeline_run, ) _stdout, _stderr = process.communicate() events = [ event for event in ipc_read_event_stream(output_file_path) ] assert len(events) == 12 assert [ event.event_type_value for event in events if hasattr(event, 'event_type_value' ) # ExecuteRunArgsLoadComplete is synthetic ] == [ 'PIPELINE_START', 'ENGINE_EVENT', 'STEP_START', 'STEP_OUTPUT', 'STEP_SUCCESS', 'STEP_START', 'STEP_INPUT', 'STEP_OUTPUT', 'STEP_SUCCESS', 'ENGINE_EVENT', 'PIPELINE_SUCCESS', ]
def test_write_error_stream(): with tempfile.NamedTemporaryFile() as f: with ipc_write_stream(f.name) as _: raise Exception('uh oh') messages = [] for message in ipc_read_event_stream(f.name): messages.append(message) assert len(messages) == 1 message = messages[0] assert isinstance(message, IPCErrorMessage) assert 'uh oh' in message.serializable_error_info.message
def cli_api_launch_run(output_file, instance, pipeline_origin, pipeline_run): check.str_param(output_file, 'output_file') check.inst_param(instance, 'instance', DagsterInstance) check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) with safe_tempfile_path() as input_file: process = _cli_api_execute_run_process( input_file, output_file, instance, pipeline_origin, pipeline_run ) # we need to process this event in order to ensure that the called process loads the input event = next(ipc_read_event_stream(output_file)) check.inst(event, ExecuteRunArgsLoadComplete) return process
def cli_api_execute_run(output_file, instance, pipeline_origin, pipeline_run): check.str_param(output_file, 'output_file') check.inst_param(instance, 'instance', DagsterInstance) check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin) check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) from dagster.cli.api import ExecuteRunArgsLoadComplete with safe_tempfile_path() as input_file: write_unary_input( input_file, ExecuteRunArgs( pipeline_origin=pipeline_origin, pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ), ) parts = [ pipeline_origin.executable_path, '-m', 'dagster', 'api', 'execute_run', input_file, output_file, ] instance.report_engine_event( 'About to start process for pipeline "{pipeline_name}" (run_id: {run_id}).' .format(pipeline_name=pipeline_run.pipeline_name, run_id=pipeline_run.run_id), pipeline_run, engine_event_data=EngineEventData( marker_start='cli_api_subprocess_init'), ) process = open_ipc_subprocess(parts) # we need to process this event in order to ensure that the called process loads the input event = next(ipc_read_event_stream(output_file)) check.inst(event, ExecuteRunArgsLoadComplete) return process
def test_snapshot_command_repository(): with get_temp_file_name() as output_file: runner = CliRunner() result = runner.invoke( repository_snapshot_command, [ output_file, '-y', file_relative_path(__file__, 'repository_file.yaml') ], ) assert result.exit_code == 0 # Now that we have the snapshot make sure that it can be properly deserialized messages = list(ipc_read_event_stream(output_file)) assert len(messages) == 1 external_repository_data = messages[0] assert isinstance(external_repository_data, ExternalRepositoryData) assert external_repository_data.name == 'bar' assert len(external_repository_data.external_pipeline_datas) == 2
def test_write_error_with_custom_message(): with safe_tempfile_path() as filename: with ipc_write_stream(filename) as stream: try: raise Exception("uh oh") except: stream.send_error(sys.exc_info(), message="custom") messages = [] for message in ipc_read_event_stream(filename): messages.append(message) assert len(messages) == 1 ipc_message = messages[0] assert isinstance(ipc_message, IPCErrorMessage) assert "uh oh" in ipc_message.serializable_error_info.message assert ipc_message.message == "custom"
def test_write_error_with_custom_message(): with tempfile.NamedTemporaryFile() as f: with ipc_write_stream(f.name) as stream: try: raise Exception('uh oh') except: # pylint: disable=bare-except stream.send_error(sys.exc_info(), message='custom') messages = [] for message in ipc_read_event_stream(f.name): messages.append(message) assert len(messages) == 1 ipc_message = messages[0] assert isinstance(ipc_message, IPCErrorMessage) assert 'uh oh' in ipc_message.serializable_error_info.message assert ipc_message.message == 'custom'
def run_detached_container_command(image, command, volumes, output_file): try: from docker.client import from_env client = from_env() # This is currently blocking. Has to be updated to run in detached mode. The problem # is knowing when the file is ready to be read from. client.containers.run(image, command=command, volumes=volumes, auto_remove=False) for message in ipc_read_event_stream(output_file): yield message except ImportError: warnings.warn( "Cannot load docker environment without the python package docker. Ensure that dagster[docker] or the python package docker is installed." ) raise
def test_write_read_stream(): @whitelist_for_serdes class TestMessage(namedtuple("_TestMessage", "message")): def __new__(cls, message): return super(TestMessage, cls).__new__(cls, message) with safe_tempfile_path() as f: message_1 = TestMessage(message="hello") message_2 = TestMessage(message="world") with ipc_write_stream(f) as stream: stream.send(message_1) stream.send(message_2) messages = [] for message in ipc_read_event_stream(f): messages.append(message) assert messages[0] == message_1 assert messages[1] == message_2
def sync_get_external_repository(location_handle): check.inst_param(location_handle, 'location_handle', LocationHandle) with get_temp_file_name() as output_file: parts = ['dagster', 'api', 'snapshot', 'repository', output_file] + xplat_shlex_split( location_handle.pointer.get_cli_args() ) returncode = subprocess.check_call(parts) check.invariant(returncode == 0, 'dagster api cli invocation did not complete successfully') messages = list(ipc_read_event_stream(output_file)) check.invariant(len(messages) == 1) external_repository_data = messages[0] check.inst(external_repository_data, ExternalRepositoryData) return ExternalRepository( external_repository_data, RepositoryHandle(external_repository_data.name, location_handle), )
def api_execute_pipeline(instance, recon_repo, pipeline_name, environment_dict, mode, solid_subset): check.inst_param(instance, 'instance', DagsterInstance) check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository) check.str_param(pipeline_name, 'pipeline_name') check.dict_param(environment_dict, 'environment_dict') check.str_param(mode, 'mode') check.opt_list_param(solid_subset, 'solid_subset', of_type=str) check.param_invariant(recon_repo.yaml_path, 'recon_repo', 'Only support yaml-based repositories for now') with get_temp_dir(in_directory=get_system_temp_directory()) as tmp_dir: output_file_name = "{}.json".format(uuid.uuid4()) output_file = os.path.join(tmp_dir, output_file_name) command = ( "dagster api execute_pipeline -y {repository_file} {pipeline_name} " "{output_file} --environment-dict='{environment_dict}' --mode={mode} " "--instance-ref='{instance_ref}'").format( repository_file=recon_repo.yaml_path, pipeline_name=pipeline_name, output_file=output_file, environment_dict=json.dumps(environment_dict), mode=mode, instance_ref=serialize_dagster_namedtuple(instance.get_ref()), ) if solid_subset: command += " --solid_subset={solid_subset}".format( solid_subset=",".join(solid_subset)) os.popen(command) for message in ipc_read_event_stream(output_file): yield message
def sync_cli_api_execute_run(instance, pipeline_origin, pipeline_name, environment_dict, mode, solids_to_execute): with safe_tempfile_path() as output_file_path: pipeline_run = instance.create_run( pipeline_name=pipeline_name, run_id=None, environment_dict=environment_dict, mode=mode, solids_to_execute=solids_to_execute, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, parent_pipeline_snapshot=None, ) process = cli_api_execute_run(output_file_path, instance, pipeline_origin, pipeline_run) _stdout, _stderr = process.communicate() for message in ipc_read_event_stream(output_file_path): yield message