def test_using_s3_for_subplan(s3_bucket): pipeline = define_inty_pipeline() environment_dict = {'storage': {'s3': {'s3_bucket': s3_bucket}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.transform') step_keys = ['return_one.transform'] run_id = str(uuid.uuid4()) try: return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=step_keys, )) assert get_step_output(return_one_step_events, 'return_one.transform') with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: assert has_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform') assert get_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform', Int) == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id), step_keys_to_execute=['add_one.transform'], )) assert get_step_output(add_one_step_events, 'add_one.transform') with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: assert has_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform') assert get_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform', Int) == 2 finally: with yield_pipeline_execution_context( pipeline, environment_dict, RunConfig(run_id=run_id)) as context: rm_s3_intermediate(context, s3_bucket, run_id, 'return_one.transform') rm_s3_intermediate(context, s3_bucket, run_id, 'add_one.transform')
def test_file_system_object_store_composite_types_with_custom_serializer_for_inner_type( ): run_id = str(uuid.uuid4()) object_store = FileSystemObjectStore(run_id=run_id) assert object_store.root == os.path.join(seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files') with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_object( ['foo', 'bar'], context, resolve_to_runtime_type(List_(LowercaseString)).inst(), ['list'], ) assert object_store.has_object(context, ['list']) assert object_store.get_object( context, resolve_to_runtime_type(List_(Bool_)).inst(), ['list']) == ['foo', 'bar'] finally: try: shutil.rmtree(object_store.root) except seven.FileNotFoundError: pass
def test_file_system_object_store_composite_types(): run_id = str(uuid.uuid4()) object_store = FileSystemObjectStore(run_id=run_id) assert object_store.root == os.path.join(seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files') with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_object([True, False], context, resolve_to_runtime_type( List_(Bool_)).inst(), ['bool']) assert object_store.has_object(context, ['bool']) assert object_store.get_object( context, resolve_to_runtime_type(List_(Bool_)).inst(), ['bool']) == [True, False] finally: try: shutil.rmtree(object_store.root) except seven.FileNotFoundError: pass
def test_file_system_object_store_with_base_dir(): run_id = str(uuid.uuid4()) try: tempdir = tempfile.mkdtemp() object_store = FileSystemObjectStore(run_id=run_id, base_dir=tempdir) assert object_store.root == os.path.join(tempdir, 'dagster', 'runs', run_id, 'files') with yield_pipeline_execution_context(PipelineDefinition( []), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_object(True, context, Bool.inst(), ['true']) assert object_store.has_object(context, ['true']) assert object_store.get_object(context, Bool.inst(), ['true']) is True finally: try: shutil.rmtree(object_store.root) except seven.FileNotFoundError: pass finally: try: shutil.rmtree(tempdir) except seven.FileNotFoundError: pass
def test_serialize_deserialize(): with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig()) as context: with tempfile.NamedTemporaryFile() as fd: serialize_to_file(context, PickleSerializationStrategy(), 'foo', fd.name) assert deserialize_from_file(context, PickleSerializationStrategy(), fd.name) == 'foo'
def test_file_system_object_store_with_composite_type_storage_plugin(): run_id = str(uuid.uuid4()) # FIXME need a dedicated test bucket object_store = FileSystemObjectStore( run_id=run_id, types_to_register={ String.inst(): FancyStringFilesystemTypeStoragePlugin }) with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: with pytest.raises(check.NotImplementedCheckError): object_store.set_value(['hello'], context, resolve_to_runtime_type(List_(String_)), ['obj_name']) with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: with pytest.raises(check.NotImplementedCheckError): object_store.set_value(['hello'], context, resolve_to_runtime_type(Nullable_(String_)), ['obj_name']) with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: with pytest.raises(check.NotImplementedCheckError): object_store.set_value(['hello'], context, resolve_to_runtime_type( List_(Nullable_(String_))), ['obj_name']) with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: with pytest.raises(check.NotImplementedCheckError): object_store.set_value(['hello'], context, resolve_to_runtime_type( Nullable_(List_(String_))), ['obj_name'])
def define_out_of_pipeline_context(self, context_config): pipeline_def = PipelineDefinition([], name='Ephemeral Notebook Pipeline') # BUG: If the context cleans up after itself (e.g. closes a db connection or similar) # This will instigate that process *before* return. We are going to have to # manage this manually (without an if block) in order to make this work. # See https://github.com/dagster-io/dagster/issues/796 with yield_pipeline_execution_context( pipeline_def, {} if context_config is None else {'context': context_config}, ExecutionMetadata(run_id=''), ) as pipeline_context: self.context = DagstermillInNotebookExecutionContext(pipeline_context) return self.context
def test_s3_object_store_with_composite_type_storage_plugin(): run_id = str(uuid.uuid4()) # FIXME need a dedicated test bucket object_store = S3ObjectStore( run_id=run_id, s3_bucket='dagster-airflow-scratch', types_to_register={String.inst(): FancyStringS3TypeStoragePlugin}, ) with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: with pytest.raises(check.NotImplementedCheckError): object_store.set_value(['hello'], context, resolve_to_runtime_type(List_(String_)), ['obj_name'])
def test_s3_object_store(): run_id = str(uuid.uuid4()) # FIXME need a dedicated test bucket object_store = S3ObjectStore(run_id=run_id, s3_bucket='dagster-airflow-scratch') assert object_store.root == '/'.join(['dagster', 'runs', run_id, 'files']) with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_object(True, context, Bool.inst(), ['true']) assert object_store.has_object(context, ['true']) assert object_store.get_object(context, Bool.inst(), ['true']) is True assert object_store.url_for_paths(['true']).startswith('s3://') finally: object_store.rm_object(context, ['true'])
def execute(self): from dagster.core.execution import yield_pipeline_execution_context check.inst(self.run_config.executor_config, MultiprocessExecutorConfig) pipeline = self.run_config.executor_config.pipeline_fn() with yield_pipeline_execution_context( pipeline, self.environment_dict, self.run_config.with_tags( pid=str(os.getpid()))) as pipeline_context: execution_plan = create_execution_plan_core( pipeline_context.pipeline_def, pipeline_context.environment_config) for step_event in start_inprocess_executor( pipeline_context, execution_plan, pipeline_context.intermediates_manager, step_keys_to_execute=[self.step_key], ): yield step_event
def test_file_system_object_store_with_custom_serializer(): run_id = str(uuid.uuid4()) object_store = FileSystemObjectStore(run_id=run_id) with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_object('foo', context, LowercaseString.inst(), ['foo']) with open(os.path.join(object_store.root, 'foo'), 'rb') as fd: assert fd.read().decode('utf-8') == 'FOO' assert object_store.has_object(context, ['foo']) assert object_store.get_object(context, LowercaseString.inst(), ['foo']) == 'foo' finally: try: shutil.rmtree(object_store.root) except seven.FileNotFoundError: pass
def test_s3_object_store_with_type_storage_plugin(): run_id = str(uuid.uuid4()) # FIXME need a dedicated test bucket object_store = S3ObjectStore( run_id=run_id, s3_bucket='dagster-airflow-scratch', types_to_register={String.inst(): FancyStringS3TypeStoragePlugin}, ) with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_value('hello', context, String.inst(), ['obj_name']) assert object_store.has_object(context, ['obj_name']) assert object_store.get_value(context, String.inst(), ['obj_name']) == 'hello' finally: object_store.rm_object(context, ['obj_name'])
def test_s3_object_store_composite_types_with_custom_serializer_for_inner_type( ): run_id = str(uuid.uuid4()) object_store = S3ObjectStore(run_id=run_id, s3_bucket='dagster-airflow-scratch') with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_object( ['foo', 'bar'], context, resolve_to_runtime_type(List_(LowercaseString)).inst(), ['list'], ) assert object_store.has_object(context, ['list']) assert object_store.get_object( context, resolve_to_runtime_type(List_(Bool_)).inst(), ['list']) == ['foo', 'bar'] finally: object_store.rm_object(context, ['foo'])
def test_s3_object_store_with_custom_serializer(): run_id = str(uuid.uuid4()) # FIXME need a dedicated test bucket object_store = S3ObjectStore(run_id=run_id, s3_bucket='dagster-airflow-scratch') with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_object('foo', context, LowercaseString.inst(), ['foo']) assert (object_store.s3.get_object( Bucket=object_store.bucket, Key='/'.join([object_store.root] + ['foo']))['Body'].read().decode('utf-8') == 'FOO') assert object_store.has_object(context, ['foo']) assert object_store.get_object(context, LowercaseString.inst(), ['foo']) == 'foo' finally: object_store.rm_object(context, ['foo'])
def populate_context( self, run_id, solid_def_name, pipeline_def_name, marshal_dir, environment_dict, output_log_path, ): check.dict_param(environment_dict, 'environment_dict') self.populated_by_papermill = True check.invariant( self.repository_def != None, desc='When running Dagstermill notebook in pipeline, ' 'must register a repository within notebook by calling ' '"dm.register_repository(repository_def)"', ) self.pipeline_def = self.repository_def.get_pipeline(pipeline_def_name) check.invariant(self.pipeline_def.has_solid_def(solid_def_name)) self.solid_def = self.pipeline_def.solid_def_named(solid_def_name) self.marshal_dir = marshal_dir loggers = None if output_log_path != 0: event_logger = construct_json_event_logger(output_log_path) loggers = [event_logger] # do not include event_callback in ExecutionMetadata, # since that'll be taken care of by side-channel established by event_logger execution_metadata = ExecutionMetadata(run_id, loggers=loggers) # See block comment above referencing this issue # See https://github.com/dagster-io/dagster/issues/796 with yield_pipeline_execution_context( self.pipeline_def, environment_dict, execution_metadata ) as pipeline_context: self.context = DagstermillInNotebookExecutionContext(pipeline_context) return self.context
def test_file_system_object_store(): run_id = str(uuid.uuid4()) object_store = FileSystemObjectStore(run_id=run_id) assert object_store.root == os.path.join(seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files') with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig(run_id=run_id)) as context: try: object_store.set_object(True, context, Bool.inst(), ['true']) assert object_store.has_object(context, ['true']) assert object_store.get_object(context, Bool.inst(), ['true']) is True assert object_store.url_for_paths(['true']).startswith('file:///') assert object_store.rm_object(context, ['true']) is None assert object_store.rm_object(context, ['true']) is None assert object_store.rm_object(context, ['dslkfhjsdflkjfs']) is None finally: try: shutil.rmtree(object_store.root) except seven.FileNotFoundError: pass
def populate_context( self, run_id, solid_def_name, pipeline_def_name, marshal_dir, environment_dict, output_log_path, input_name_type_dict, output_name_type_dict, ): check.dict_param(environment_dict, 'environment_dict') self.populated_by_papermill = True self.solid_def_name = solid_def_name self.marshal_dir = marshal_dir if self.repository_def is None: self.pipeline_def = PipelineDefinition( [], name='Dummy Pipeline (No Repo Registration)') self.input_name_type_dict = dict_to_enum(input_name_type_dict) self.output_name_type_dict = dict_to_enum(output_name_type_dict) for _, runtime_type_enum in self.input_name_type_dict.items(): if runtime_type_enum == SerializableRuntimeType.NONE: raise DagstermillError( 'If Dagstermill solids have inputs that require serialization strategies ' 'that are not pickling, then you must register a repository within ' 'notebook by calling dm.register_repository(repository_def)' ) for _, runtime_type_enum in self.output_name_type_dict.items(): if runtime_type_enum == SerializableRuntimeType.NONE: raise DagstermillError( 'If Dagstermill solids have outputs that require serialization strategies ' 'that are not pickling, then you must register a repository within ' 'notebook by calling dm.register_repository(repository_def).' ) with yield_pipeline_execution_context( self.pipeline_def, {}, RunConfig(run_id=run_id)) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context) else: self.pipeline_def = self.repository_def.get_pipeline( pipeline_def_name) check.invariant(self.pipeline_def.has_solid_def(solid_def_name)) self.solid_def = self.pipeline_def.solid_def_named(solid_def_name) loggers = None if output_log_path != 0: # there is no output log event_logger = construct_json_event_logger(output_log_path) loggers = [event_logger] # do not include event_callback in ExecutionMetadata, # since that'll be taken care of by side-channel established by event_logger execution_metadata = RunConfig(run_id, loggers=loggers) # See block comment above referencing this issue # See https://github.com/dagster-io/dagster/issues/796 with yield_pipeline_execution_context( self.pipeline_def, environment_dict, execution_metadata) as pipeline_context: self.context = DagstermillInNotebookExecutionContext( pipeline_context) return self.context
def run_test_pipeline(pipeline): execution_metadata = ExecutionMetadata(run_id=str(uuid.uuid4())) with yield_pipeline_execution_context(pipeline, TEST_ENVIRONMENT, execution_metadata) as context: execution_plan = create_execution_plan_core(context) return execute_plan(context, execution_plan)