def test_using_file_system_for_subplan_multiprocessing():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']

    run_id = str(uuid.uuid4())

    return_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(
                run_id=run_id,
                executor_config=MultiprocessExecutorConfig(
                    handle=ExecutionTargetHandle.for_pipeline_fn(define_inty_pipeline)
                ),
            ),
            step_keys_to_execute=step_keys,
        )
    )

    store = FileSystemIntermediateStore(run_id)

    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert store.has_intermediate(None, 'return_one.compute')
    assert store.get_intermediate(None, 'return_one.compute', Int) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(
                run_id=run_id,
                executor_config=MultiprocessExecutorConfig(
                    handle=ExecutionTargetHandle.for_pipeline_fn(define_inty_pipeline)
                ),
            ),
            step_keys_to_execute=['add_one.compute'],
        )
    )

    assert get_step_output(add_one_step_events, 'add_one.compute')
    assert store.has_intermediate(None, 'add_one.compute')
    assert store.get_intermediate(None, 'add_one.compute', Int) == 2
示例#2
0
class TestExecuteDagContainerizedS3Storage(object):
    handle = ExecutionTargetHandle.for_pipeline_fn(
        define_demo_execution_pipeline)
    pipeline_name = 'demo_pipeline'
    environment_yaml = [
        script_relative_path('test_project/env.yaml'),
        script_relative_path('test_project/env_s3.yaml'),
    ]
    run_id = str(uuid.uuid4())
    execution_date = datetime.datetime.utcnow()
    image = IMAGE

    # pylint: disable=redefined-outer-name
    def test_execute_dag_containerized(
            self, dagster_airflow_docker_operator_pipeline):
        for result in dagster_airflow_docker_operator_pipeline:
            assert 'data' in result
            assert 'executePlan' in result['data']
            assert '__typename' in result['data']['executePlan']
            assert result['data']['executePlan'][
                '__typename'] == 'ExecutePlanSuccess'
            result = list(
                filter(
                    lambda x: x['__typename'] == 'ExecutionStepOutputEvent',
                    result['data']['executePlan']['stepEvents'],
                ))[0]
            if result['step']['kind'] == 'INPUT_THUNK':
                continue
示例#3
0
class TestAirflowPython_1WarehouseExecution(object):
    handle = ExecutionTargetHandle.for_pipeline_fn(
        define_airline_demo_warehouse_pipeline)
    pipeline_name = 'airline_demo_warehouse_pipeline'
    environment_yaml = [
        script_relative_path(
            os.path.join('..', '..', 'dagster_examples', 'airline_demo',
                         'environments', 'local_base.yaml')),
        script_relative_path(
            os.path.join('..', '..', 'dagster_examples', 'airline_demo',
                         'environments', 'local_airflow.yaml')),
        script_relative_path(
            os.path.join(
                '..',
                '..',
                'dagster_examples',
                'airline_demo',
                'environments',
                'local_warehouse.yaml',
            )),
    ]
    mode = 'local'

    def test_airflow_run_warehouse_pipeline(
            self, dagster_airflow_python_operator_pipeline):
        pass
示例#4
0
def test_error_pipeline_multiprocess():
    result = execute_pipeline(
        ExecutionTargetHandle.for_pipeline_fn(define_error_pipeline).build_pipeline_definition(),
        environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}},
        instance=DagsterInstance.local_temp(),
    )
    assert not result.success
示例#5
0
def test_failing():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_fn(define_failing_pipeline)
    pipeline = define_failing_pipeline()
    env_config = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle,
                                       pipeline,
                                       pipeline_run,
                                       raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    assert pipeline_run.all_logs()
示例#6
0
def test_execution_crash():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_fn(define_crashy_pipeline)
    pipeline = define_crashy_pipeline()
    env_config = {
        'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}}
    }
    selector = ExecutionSelector('csv_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    last_log = pipeline_run.all_logs()[-1]
    print(last_log.message)
    assert last_log.message.startswith(
        'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format(
            run_id=run_id
        )
    )
示例#7
0
def test_running():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_fn(define_passing_pipeline)
    pipeline = define_passing_pipeline()
    env_config = {
        'solids': {'sum_solid': {'inputs': {'num': script_relative_path('data/num.csv')}}}
    }
    selector = ExecutionSelector('csv_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        mode='default',
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(handle, pipeline, pipeline_run, raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.SUCCESS
    events = pipeline_run.all_logs()
    assert events

    process_start_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(events, DagsterEventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1
示例#8
0
def test_event_sink_serialization():
    event_records = []

    class TestEventSink(EventSink):
        def on_dagster_event(self, dagster_event):
            event_records.append(dagster_event)

        def on_log_message(self, log_message):
            event_records.append(log_message)

    @lambda_solid
    def no():
        raise Exception('no')

    @pipeline
    def fails():
        no()

    sink = TestEventSink()

    # basic success
    execute_pipeline(define_simple(), run_config=RunConfig(event_sink=sink))
    # basic failure
    execute_pipeline(
        fails,
        run_config=RunConfig(event_sink=sink),
        environment_dict={
            'execution': {
                'in_process': {
                    'config': {
                        'raise_on_error': False
                    }
                }
            }
        },
    )
    # multiproc
    execute_pipeline(
        ExecutionTargetHandle.for_pipeline_fn(
            define_simple).build_pipeline_definition(),
        run_config=RunConfig(event_sink=sink),
        environment_dict={
            'storage': {
                'filesystem': {}
            },
            'execution': {
                'multiprocess': {}
            }
        },
    )
    # kitchen sink
    execute_pipeline(many_events, run_config=RunConfig(event_sink=sink))

    for dagster_event in event_records:
        payload = dagster_event.to_json()
        clone = EventRecord.from_json(payload)
        assert clone == dagster_event
示例#9
0
def test_execute_on_dask():
    result = execute_on_dask(
        ExecutionTargetHandle.for_pipeline_fn(define_dask_test_pipeline),
        env_config={'storage': {
            'filesystem': {}
        }},
        dask_config=DaskConfig(timeout=30),
    )
    assert result.result_for_solid('simple').result_value() == 1
示例#10
0
def test_mem_storage_error_pipeline_multiprocess():
    result = execute_pipeline(
        ExecutionTargetHandle.for_pipeline_fn(define_diamond_pipeline).build_pipeline_definition(),
        environment_dict={'execution': {'multiprocess': {}}},
        instance=DagsterInstance.local_temp(),
        raise_on_error=False,
    )
    assert not result.success
    assert len(result.event_list) == 1
    assert result.event_list[0].is_failure
示例#11
0
def test_error_pipeline_multiprocess():
    pipeline = define_error_pipeline()
    result = execute_pipeline(
        pipeline,
        run_config=RunConfig(executor_config=MultiprocessExecutorConfig(
            ExecutionTargetHandle.for_pipeline_fn(define_error_pipeline))),
        environment_dict={'storage': {
            'filesystem': {}
        }},
    )
    assert not result.success
def test_error_pipeline_multiprocess():
    pipeline = define_error_pipeline()
    result = execute_pipeline(
        pipeline,
        run_config=RunConfig(
            executor_config=MultiprocessExecutorConfig(
                ExecutionTargetHandle.for_pipeline_fn(define_error_pipeline)
            ),
            storage_mode=RunStorageMode.FILESYSTEM,
        ),
    )
    assert not result.success
示例#13
0
class TestAirflowPython_0IngestExecution:
    handle = ExecutionTargetHandle.for_pipeline_fn(define_airline_demo_ingest_pipeline)
    pipeline_name = 'airline_demo_ingest_pipeline'
    config_yaml = [
        script_relative_path(os.path.join('..', 'environments', 'local_base.yaml')),
        script_relative_path(os.path.join('..', 'environments', 'local_airflow.yaml')),
        script_relative_path(os.path.join('..', 'environments', 'local_fast_ingest.yaml')),
    ]
    mode = 'local'

    def test_airflow_run_ingest_pipeline(self, dagster_airflow_python_operator_pipeline):
        pass
示例#14
0
def test_execute_on_dask():
    '''This test is flaky on py27, I believe because of
    https://github.com/dask/distributed/issues/2446. For now, we just retry a couple times...
    '''
    result = execute_on_dask(
        ExecutionTargetHandle.for_pipeline_fn(define_dask_test_pipeline),
        env_config={'storage': {
            'filesystem': {}
        }},
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM),
        dask_config=DaskConfig(timeout=30),
    )
    assert result.result_for_solid('simple').transformed_value() == 1
示例#15
0
class TestAirflowizedEventPipeline(object):
    config_yaml = [
        script_relative_path(
            '../../dagster_examples/airline_demo/environments/default.yaml')
    ]
    handle = ExecutionTargetHandle.for_pipeline_fn(
        define_event_ingest_pipeline)
    pipeline_name = 'event_ingest_pipeline'

    # pylint: disable=redefined-outer-name
    def test_airflowized_event_pipeline(
            self, dagster_airflow_python_operator_pipeline):
        pass
示例#16
0
def test_invalid_instance():
    result = execute_pipeline(
        ExecutionTargetHandle.for_pipeline_fn(define_diamond_pipeline).build_pipeline_definition(),
        environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}},
        instance=DagsterInstance.ephemeral(),
        raise_on_error=False,
    )
    assert not result.success
    assert len(result.event_list) == 1
    assert result.event_list[0].is_failure
    assert (
        result.event_list[0].pipeline_init_failure_data.error.cls_name
        == 'DagsterUnmetExecutorRequirementsError'
    )
    assert 'non-ephemeral instance' in result.event_list[0].pipeline_init_failure_data.error.message
def test_using_file_system_for_subplan_multiprocessing():

    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.local_temp()

    execution_plan = create_execution_plan(
        ExecutionTargetHandle.for_pipeline_fn(
            define_inty_pipeline).build_pipeline_definition(),
        environment_dict=environment_dict,
    )

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']

    run_id = str(uuid.uuid4())
    instance.create_empty_run(run_id, execution_plan.pipeline_def.name)

    return_one_step_events = list(
        execute_plan(
            execution_plan,
            instance,
            environment_dict=dict(environment_dict,
                                  execution={'multiprocess': {}}),
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=step_keys,
        ))

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        run_id)

    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert store.has_intermediate(None, 'return_one.compute')
    assert store.get_intermediate(None, 'return_one.compute', Int).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan,
            instance,
            environment_dict=dict(environment_dict,
                                  execution={'multiprocess': {}}),
            run_config=RunConfig(run_id=run_id),
            step_keys_to_execute=['add_one.compute'],
        ))

    assert get_step_output(add_one_step_events, 'add_one.compute')
    assert store.has_intermediate(None, 'add_one.compute')
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 2
示例#18
0
def test_mem_storage_error_pipeline_multiprocess():
    with pytest.raises(DagsterInvariantViolationError) as exc_info:
        execute_pipeline(
            define_diamond_pipeline(),
            run_config=RunConfig(executor_config=MultiprocessExecutorConfig(
                ExecutionTargetHandle.for_pipeline_fn(define_error_pipeline))),
        )

    assert ('While invoking '
            'pipeline diamond_execution. You have attempted to use the '
            'multiprocessing executor while using system storage in_memory '
            'which does not persist intermediates. This means there would '
            'be no way to move data between different processes. Please '
            'configure your pipeline in the storage config section to use '
            'persistent system storage such as the filesystem.') in str(
                exc_info.value)
示例#19
0
def test_invalid_instance():
    result = execute_pipeline(
        ExecutionTargetHandle.for_pipeline_fn(
            define_diamond_pipeline).build_pipeline_definition(),
        environment_dict={
            'storage': {
                'filesystem': {}
            },
            'execution': {
                'multiprocess': {}
            }
        },
        instance=DagsterInstance.ephemeral(),
    )
    assert not result.success
    assert len(result.event_list) == 1
    assert result.event_list[0].is_failure
def test_using_file_system_for_subplan_multiprocessing():

    environment_dict = {'storage': {'filesystem': {}}}
    instance = DagsterInstance.local_temp()

    pipeline_def = ExecutionTargetHandle.for_pipeline_fn(
        define_inty_pipeline
    ).build_pipeline_definition()

    execution_plan = create_execution_plan(pipeline_def, environment_dict=environment_dict)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline=pipeline_def, execution_plan=execution_plan
    )

    assert execution_plan.get_step_by_key('return_one.compute')

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['return_one.compute']),
            instance,
            environment_dict=dict(environment_dict, execution={'multiprocess': {}}),
            pipeline_run=pipeline_run,
        )
    )

    store = build_fs_intermediate_store(instance.intermediates_directory, pipeline_run.run_id)

    assert get_step_output(return_one_step_events, 'return_one.compute')
    assert store.has_intermediate(None, 'return_one.compute')
    assert store.get_intermediate(None, 'return_one.compute', Int).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            instance,
            environment_dict=dict(environment_dict, execution={'multiprocess': {}}),
            pipeline_run=pipeline_run,
        )
    )

    assert get_step_output(add_one_step_events, 'add_one.compute')
    assert store.has_intermediate(None, 'add_one.compute')
    assert store.get_intermediate(None, 'add_one.compute', Int).obj == 2
示例#21
0
class TestExecuteDagPythonS3Storage(object):
    handle = ExecutionTargetHandle.for_pipeline_fn(
        define_demo_execution_pipeline)
    pipeline_name = 'demo_pipeline'
    environment_yaml = [
        script_relative_path('test_project/env.yaml'),
        script_relative_path('test_project/env_s3.yaml'),
    ]
    run_id = str(uuid.uuid4())
    execution_date = datetime.datetime.utcnow()

    # pylint: disable=redefined-outer-name
    def test_execute_dag(self, dagster_airflow_python_operator_pipeline):
        expected_results = {
            'multiply_the_word': '"barbar"',
            'count_letters': '{"b": 2, "a": 2, "r": 2}',
        }
        for result in dagster_airflow_python_operator_pipeline:
            assert 'data' in result
            assert 'executePlan' in result['data']
            assert '__typename' in result['data']['executePlan']
            assert result['data']['executePlan'][
                '__typename'] == 'ExecutePlanSuccess'
            result = list(
                filter(
                    lambda x: x['__typename'] == 'ExecutionStepOutputEvent',
                    result['data']['executePlan']['stepEvents'],
                ))[0]
            if result['step']['kind'] == 'INPUT_THUNK':
                continue
            # This ugly beast is to deal with cross-python-version differences in `valueRepr` --
            # in py2 we'll get 'u"barbar"', in py3 we'll get '"barbar"', etc.
            assert json.loads(
                # pylint: disable=anomalous-backslash-in-string
                re.sub(
                    r'\{u\'',
                    '{\'',
                    re.sub(' u\'', ' \'',
                           re.sub('^u\'', '\'', result['valueRepr'])),
                ).replace('\'', '"')) == json.loads(
                    expected_results[result['step']['solidHandleID']].replace(
                        '\'', '"'))
示例#22
0
class TestExecuteDagContainerizedFilesystemStorage(object):
    handle = ExecutionTargetHandle.for_pipeline_fn(
        define_demo_execution_pipeline)
    pipeline_name = 'demo_pipeline'
    environment_yaml = [
        script_relative_path('test_project/env.yaml'),
        script_relative_path('test_project/env_filesystem.yaml'),
    ]
    run_id = str(uuid.uuid4())
    execution_date = datetime.datetime.utcnow()
    op_kwargs = {'host_tmp_dir': '/tmp'}
    image = IMAGE

    # pylint: disable=redefined-outer-name
    def test_execute_dag_containerized(
            self, dagster_airflow_docker_operator_pipeline):
        expected_results = {
            'multiply_the_word': '"barbar"',
            'count_letters': '{"b": 2, "a": 2, "r": 2}',
        }
        for result in dagster_airflow_docker_operator_pipeline:
            assert 'data' in result
            assert 'executePlan' in result['data']
            assert '__typename' in result['data']['executePlan']
            assert result['data']['executePlan'][
                '__typename'] == 'ExecutePlanSuccess'
            result = list(
                filter(
                    lambda x: x['__typename'] == 'ExecutionStepOutputEvent',
                    result['data']['executePlan']['stepEvents'],
                ))[0]
            if result['step']['kind'] == 'INPUT_THUNK':
                continue
            assert json.loads(
                re.sub(
                    '{u\'', '{\'',
                    re.sub(' u\'', ' \'',
                           re.sub('^u\'', '\'', result['valueRepr']))).replace(
                               '\'', '"')) == json.loads(expected_results[
                                   result['step']['solidHandleID']].replace(
                                       '\'', '"'))
示例#23
0
def test_diamond_multi_execution():
    pipeline = define_diamond_pipeline()
    result = execute_pipeline(
        pipeline,
        environment_dict={'storage': {'filesystem': {}}},
        run_config=RunConfig(
            executor_config=MultiprocessExecutorConfig(
                ExecutionTargetHandle.for_pipeline_fn(define_diamond_pipeline)
            )
        ),
    )
    assert result.success

    assert result.result_for_solid('adder').result_value() == 11

    pids_by_solid = {}
    for solid in pipeline.solids:
        pids_by_solid[solid.name] = transform_event(result, solid.name).logging_tags['pid']

    # guarantee that all solids ran in their own process
    assert len(set(pids_by_solid.values())) == len(pipeline.solids)
def test_diamond_multi_execution():
    pipeline = define_diamond_pipeline()
    result = execute_pipeline(
        pipeline,
        run_config=RunConfig(
            executor_config=MultiprocessExecutorConfig(
                ExecutionTargetHandle.for_pipeline_fn(define_diamond_pipeline)
            ),
            storage_mode=RunStorageMode.FILESYSTEM,
        ),
    )
    assert result.success

    # FIXME: be able to get this value
    # https://github.com/dagster-io/dagster/issues/953
    # assert result.result_for_solid('adder').transformed_value() == 11

    pids_by_solid = {}
    for solid in pipeline.solids:
        pids_by_solid[solid.name] = transform_event(result, solid.name).logging_tags['pid']

    # guarantee that all solids ran in their own process
    assert len(set(pids_by_solid.values())) == len(pipeline.solids)
示例#25
0
@solid(resources={'R2'})
def two(_):
    return 1


@solid(resources={'R1', 'R2', 'R3'})
def one_and_two_and_three(_):
    return 1


def define_resource_pipeline():
    return PipelineDefinition(
        name='resources for days',
        solids=[all_resources, one, two, one_and_two_and_three],
        mode_definitions=[ModeDefinition(resources=lots_of_resources)],
    )


if __name__ == '__main__':
    pipeline = define_resource_pipeline()
    result = execute_pipeline(
        pipeline,
        run_config=RunConfig(
            executor_config=MultiprocessExecutorConfig(
                ExecutionTargetHandle.for_pipeline_fn(
                    define_resource_pipeline)),
            storage_mode=RunStorageMode.FILESYSTEM,
        ),
    )
示例#26
0
def test_all_step_events():  # pylint: disable=too-many-locals
    handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline)
    pipeline = handle.build_pipeline_definition()
    mode = pipeline.get_default_mode_name()
    run_config = RunConfig(mode=mode)
    execution_plan = create_execution_plan(pipeline, {}, run_config=run_config)
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                'executionParams': {
                    'selector': {
                        'name': pipeline.name
                    },
                    'environmentConfigData': {
                        'storage': {
                            'filesystem': {}
                        }
                    },
                    'mode': mode,
                    'executionMetadata': {
                        'runId': run_config.run_id
                    },
                    'stepKeys': [step.key],
                }
            }
            instance = DagsterInstance.ephemeral()
            res = execute_query(handle,
                                START_PIPELINE_EXECUTION_MUTATION,
                                variables,
                                instance=instance)

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                run_logs = res['data']['startPipelineExecution']['run'][
                    'logs']['nodes']

                events = [
                    dagster_event_from_dict(e, pipeline.name) for e in run_logs
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + '.' + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {
                    DagsterEventType(e.event_type_value)
                    for e in events
                }
            else:
                raise Exception(res['errors'])

            # build up a dict, incrementing all the event records we've produced in the run storage
            logs = instance.all_logs(run_config.run_id)
            for log in logs:
                if not log.dagster_event or (
                        DagsterEventType(log.dagster_event.event_type_value)
                        not in STEP_EVENTS.union(
                            set([DagsterEventType.ENGINE_EVENT]))):
                    continue
                if log.dagster_event.step_key:
                    key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
                else:
                    key = log.dagster_event.event_type_value
                event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    assert not unhandled_events
示例#27
0
def test_all_step_events():  # pylint: disable=too-many-locals
    handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline)
    pipeline = handle.build_pipeline_definition()
    mode = pipeline.get_default_mode_name()
    execution_plan = create_execution_plan(pipeline, {}, mode=mode)
    step_levels = execution_plan.topological_step_levels()
    run_config = RunConfig(
        executor_config=InProcessExecutorConfig(raise_on_error=False),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    step_event_fragment = get_step_event_fragment()
    log_message_event_fragment = get_log_message_event_fragment()
    query = '\n'.join(
        (
            PIPELINE_EXECUTION_QUERY_TEMPLATE.format(
                step_event_fragment=step_event_fragment.include_key,
                log_message_event_fragment=log_message_event_fragment.include_key,
            ),
            step_event_fragment.fragment,
            log_message_event_fragment.fragment,
        )
    )

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                'executionParams': {
                    'selector': {'name': pipeline.name},
                    'environmentConfigData': {'storage': {'filesystem': {}}},
                    'mode': mode,
                    'executionMetadata': {'runId': run_config.run_id},
                    'stepKeys': [step.key],
                }
            }

            pipeline_run_storage = PipelineRunStorage()

            res = execute_query(handle, query, variables, pipeline_run_storage=pipeline_run_storage)

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                run_logs = res['data']['startPipelineExecution']['run']['logs']['nodes']

                events = [
                    dagster_event_from_dict(e, pipeline.name)
                    for e in run_logs
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    key = event.step_key + '.' + event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {DagsterEventType(e.event_type_value) for e in events}

            # build up a dict, incrementing all the event records we've produced in the run storage
            logs = pipeline_run_storage.get_run_by_id(run_config.run_id).all_logs()
            for log in logs:
                if not log.dagster_event or (
                    DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS
                ):
                    continue
                key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
                event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    assert not unhandled_events
示例#28
0
def test_all_step_events():  # pylint: disable=too-many-locals
    handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline)
    pipeline = handle.build_pipeline_definition()
    mode = pipeline.get_default_mode_name()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline, mode=mode)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline=pipeline, execution_plan=execution_plan, mode=mode)
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:
            variables = {
                'executionParams': {
                    'selector': {
                        'name': pipeline.name
                    },
                    'environmentConfigData': {
                        'storage': {
                            'filesystem': {}
                        }
                    },
                    'mode': mode,
                    'executionMetadata': {
                        'runId': pipeline_run.run_id
                    },
                    'stepKeys': [step.key],
                }
            }
            res = execute_query(handle,
                                EXECUTE_PLAN_MUTATION,
                                variables,
                                instance=instance)

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                assert 'data' in res, res
                assert 'executePlan' in res['data'], res
                assert 'stepEvents' in res['data']['executePlan'], res
                step_events = res['data']['executePlan']['stepEvents']

                events = [
                    dagster_event_from_dict(e, pipeline.name)
                    for e in step_events
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + '.' + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {
                    DagsterEventType(e.event_type_value)
                    for e in events
                }
            else:
                raise Exception(res['errors'])

    # build up a dict, incrementing all the event records we've produced in the run storage
    logs = instance.all_logs(pipeline_run.run_id)
    for log in logs:
        if not log.dagster_event or (DagsterEventType(
                log.dagster_event.event_type_value) not in STEP_EVENTS.union(
                    set([DagsterEventType.ENGINE_EVENT]))):
            continue
        if log.dagster_event.step_key:
            key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
        else:
            key = log.dagster_event.event_type_value
        event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    # Why are these retry events not handled? Because right now there is no way to configure retries
    # on executePlan -- this needs to change, and we should separate the ExecutionParams that get
    # sent to executePlan fromm those that get sent to startPipelineExecution and friends
    assert unhandled_events == {
        DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED
    }
示例#29
0
            SolidInstance('hammer', alias='hammer_2'): {
                'chase_duration': DependencyDefinition('giver', 'out_2')
            },
            SolidInstance('hammer', alias='hammer_3'): {
                'chase_duration': DependencyDefinition('giver', 'out_3')
            },
            SolidInstance('hammer', alias='hammer_4'): {
                'chase_duration': DependencyDefinition('giver', 'out_4')
            },
            SolidInstance('total'): {
                'in_1': DependencyDefinition('hammer_1', 'total'),
                'in_2': DependencyDefinition('hammer_2', 'total'),
                'in_3': DependencyDefinition('hammer_3', 'total'),
                'in_4': DependencyDefinition('hammer_4', 'total'),
            },
        },
        mode_definitions=[ModeDefinition()],
    )


if __name__ == '__main__':
    result = execute_on_dask(
        ExecutionTargetHandle.for_pipeline_fn(define_hammer_pipeline),
        env_config={'storage': {
            'filesystem': {}
        }},
        run_config=RunConfig(storage_mode=RunStorageMode.FILESYSTEM),
    )
    print('Total Hammer Time: ',
          result.result_for_solid('total').transformed_value())
示例#30
0
def test_exc_target_handle():
    res = ExecutionTargetHandle.for_pipeline_fn(define_pipeline)
    assert res.data.python_file == __file__
    assert res.data.fn_name == 'define_pipeline'