Exemplo n.º 1
0
def test_execute_execute_plan_mutation_raw():
    pipeline_name = 'sleepy_pipeline'
    pipeline = sleepy_recon_pipeline()
    instance = DagsterInstance.local_temp()

    workspace = load_sleepy_workspace(instance)

    pipeline_run = instance.create_run_for_pipeline(pipeline_def=pipeline.get_definition())
    variables = {
        'executionParams': {
            'runConfigData': {},
            'mode': 'default',
            'selector': {
                'repositoryLocationName': get_ephemeral_repository_name(pipeline_name),
                'repositoryName': get_ephemeral_repository_name(pipeline_name),
                'pipelineName': pipeline_name,
            },
            'executionMetadata': {'runId': pipeline_run.run_id},
        }
    }
    result = execute_execute_plan_mutation_raw(
        workspace, variables, instance_ref=instance.get_ref()
    )
    seen_events = set()
    for event in result:
        seen_events.add((event.dagster_event.event_type_value, event.step_key))

    assert seen_events == EXPECTED_EVENTS
Exemplo n.º 2
0
def test_execute_execute_plan_mutation_out_of_process_fails():
    pipeline_name = "sleepy_pipeline"
    instance = DagsterInstance.local_temp()

    pipeline = sleepy_recon_pipeline()
    workspace = load_sleepy_workspace(instance)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline.get_definition())
    variables = {
        "executionParams": {
            "runConfigData": {},
            "mode": "default",
            "selector": {
                "repositoryLocationName":
                get_ephemeral_repository_name(pipeline_name),
                "repositoryName":
                get_ephemeral_repository_name(pipeline_name),
                "pipelineName":
                pipeline_name,
            },
            "executionMetadata": {
                "runId": pipeline_run.run_id
            },
        }
    }
    with pytest.raises(
            DagsterGraphQLClientError,
            match=re.escape(
                "execute_plan is not supported for out-of-process repository locations"
            ),
    ):
        execute_execute_plan_mutation(workspace,
                                      variables,
                                      instance_ref=instance.get_ref())
Exemplo n.º 3
0
def test_execute_execute_plan_mutation():
    pipeline_name = "sleepy_pipeline"
    instance = DagsterInstance.local_temp()

    pipeline = sleepy_recon_pipeline()
    workspace = load_sleepy_workspace(instance)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline.get_definition())
    variables = {
        "executionParams": {
            "runConfigData": {},
            "mode": "default",
            "selector": {
                "repositoryLocationName":
                get_ephemeral_repository_name(pipeline_name),
                "repositoryName":
                get_ephemeral_repository_name(pipeline_name),
                "pipelineName":
                pipeline_name,
            },
            "executionMetadata": {
                "runId": pipeline_run.run_id
            },
        }
    }
    result = execute_execute_plan_mutation(workspace,
                                           variables,
                                           instance_ref=instance.get_ref())
    seen_events = set()
    for event in result:
        seen_events.add((event.event_type_value, event.step_key))

    assert seen_events == EXPECTED_EVENTS
Exemplo n.º 4
0
def test_dagster_telemetry_unset(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with instance_for_test_tempdir(temp_dir):
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            with pushd(path_to_file("")):
                pipeline_attribute = "foo_pipeline"
                pipeline_name = "foo"
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        "-f",
                        path_to_file("test_cli_commands.py"), "-a",
                        pipeline_attribute
                    ],
                )

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get("action") == UPDATE_REPO_STATS:
                        assert message.get("pipeline_name_hash") == hash_name(
                            pipeline_name)
                        assert message.get("num_pipelines_in_repo") == str(1)
                        assert message.get("repo_hash") == hash_name(
                            get_ephemeral_repository_name(pipeline_name))
                    assert set(message.keys()) == EXPECTED_KEYS

                assert len(caplog.records) == 5
                assert result.exit_code == 0
Exemplo n.º 5
0
def test_dagster_telemetry_enabled(caplog):
    with instance_for_test(overrides={"telemetry": {"enabled": True}}):
        runner = CliRunner()
        with pushd(path_to_file("")):
            pipeline_attribute = "foo_pipeline"
            pipeline_name = "foo"
            result = runner.invoke(
                pipeline_execute_command,
                [
                    "-f",
                    path_to_file("test_cli_commands.py"),
                    "-a",
                    pipeline_attribute,
                ],
            )

            for record in caplog.records:
                message = json.loads(record.getMessage())
                if message.get("action") == UPDATE_REPO_STATS:
                    assert message.get("pipeline_name_hash") == hash_name(
                        pipeline_name)
                    assert message.get("num_pipelines_in_repo") == str(1)
                    assert message.get("repo_hash") == hash_name(
                        get_ephemeral_repository_name(pipeline_name))
                assert set(message.keys()) == EXPECTED_KEYS
            assert len(caplog.records) == 5
            assert result.exit_code == 0
Exemplo n.º 6
0
def test_dagster_telemetry_unset(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd:
                yaml.dump({}, fd, default_flow_style=False)

            DagsterInstance.local_temp(temp_dir)
            runner = CliRunner(env={'DAGSTER_HOME': temp_dir})
            with pushd(path_to_file('')):
                pipeline_attribute = 'foo_pipeline'
                pipeline_name = 'foo'
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        '-f',
                        path_to_file('test_cli_commands.py'), '-a',
                        pipeline_attribute
                    ],
                )

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get('action') == UPDATE_REPO_STATS:
                        assert message.get('pipeline_name_hash') == hash_name(
                            pipeline_name)
                        assert message.get('num_pipelines_in_repo') == str(1)
                        assert message.get('repo_hash') == hash_name(
                            get_ephemeral_repository_name(pipeline_name))
                    assert set(message.keys()) == EXPECTED_KEYS

                assert len(caplog.records) == 5
                assert result.exit_code == 0
Exemplo n.º 7
0
def log_repo_stats(instance, source, pipeline=None, repo=None):
    check.inst_param(instance, "instance", DagsterInstance)
    check.str_param(source, "source")
    check.opt_inst_param(pipeline, "pipeline", IPipeline)
    check.opt_inst_param(repo, "repo", ReconstructableRepository)

    if _get_instance_telemetry_enabled(instance):
        instance_id = _get_or_set_instance_id()

        if isinstance(pipeline, ReconstructablePipeline):
            pipeline_name_hash = hash_name(pipeline.get_definition().name)
            repository = pipeline.get_reconstructable_repository().get_definition()
            repo_hash = hash_name(repository.name)
            num_pipelines_in_repo = len(repository.pipeline_names)
            num_schedules_in_repo = len(repository.schedule_defs)
            num_sensors_in_repo = len(repository.sensor_defs)
        elif isinstance(repo, ReconstructableRepository):
            pipeline_name_hash = ""
            repository = repo.get_definition()
            repo_hash = hash_name(repository.name)
            num_pipelines_in_repo = len(repository.pipeline_names)
            num_schedules_in_repo = len(repository.schedule_defs)
            num_sensors_in_repo = len(repository.sensor_defs)
        else:
            pipeline_name_hash = hash_name(pipeline.get_definition().name)
            repo_hash = hash_name(get_ephemeral_repository_name(pipeline.get_definition().name))
            num_pipelines_in_repo = 1
            num_schedules_in_repo = 0
            num_sensors_in_repo = 0

        write_telemetry_log_line(
            TelemetryEntry(
                action=UPDATE_REPO_STATS,
                client_time=str(datetime.datetime.now()),
                event_id=str(uuid.uuid4()),
                instance_id=instance_id,
                metadata={
                    "source": source,
                    "pipeline_name_hash": pipeline_name_hash,
                    "num_pipelines_in_repo": str(num_pipelines_in_repo),
                    "num_schedules_in_repo": str(num_schedules_in_repo),
                    "num_sensors_in_repo": str(num_sensors_in_repo),
                    "repo_hash": repo_hash,
                },
            )._asdict()
        )
Exemplo n.º 8
0
def log_repo_stats(instance, source, pipeline=None, repo=None):
    check.inst_param(instance, 'instance', DagsterInstance)
    check.str_param(source, 'source')
    check.opt_inst_param(pipeline, 'pipeline', ExecutablePipeline)
    check.opt_inst_param(repo, 'repo', ReconstructableRepository)

    if _get_instance_telemetry_enabled(instance):
        instance_id = _get_or_set_instance_id()

        if isinstance(pipeline, ReconstructablePipeline):
            pipeline_name_hash = hash_name(pipeline.get_definition().name)
            repository = pipeline.get_reconstructable_repository(
            ).get_definition()
            repo_hash = hash_name(repository.name)
            num_pipelines_in_repo = len(repository.pipeline_names)
        elif isinstance(repo, ReconstructableRepository):
            pipeline_name_hash = ''
            repository = repo.get_definition()
            repo_hash = hash_name(repository.name)
            num_pipelines_in_repo = len(repository.pipeline_names)
        else:
            pipeline_name_hash = hash_name(pipeline.get_definition().name)
            repo_hash = hash_name(
                get_ephemeral_repository_name(pipeline.get_definition().name))
            num_pipelines_in_repo = 1

        write_telemetry_log_line(
            TelemetryEntry(
                action=UPDATE_REPO_STATS,
                client_time=str(datetime.datetime.now()),
                event_id=str(uuid.uuid4()),
                instance_id=instance_id,
                pipeline_name_hash=pipeline_name_hash,
                num_pipelines_in_repo=str(num_pipelines_in_repo),
                repo_hash=repo_hash,
                metadata={
                    'source': source
                },
            )._asdict())
Exemplo n.º 9
0
def test_dagster_telemetry_enabled(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd:
                yaml.dump({"telemetry": {
                    "enabled": True
                }},
                          fd,
                          default_flow_style=False)

            DagsterInstance.local_temp(temp_dir)
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            with pushd(path_to_file("")):
                pipeline_attribute = "foo_pipeline"
                pipeline_name = "foo"
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        "-f",
                        path_to_file("test_cli_commands.py"),
                        "-a",
                        pipeline_attribute,
                    ],
                )

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get("action") == UPDATE_REPO_STATS:
                        assert message.get("pipeline_name_hash") == hash_name(
                            pipeline_name)
                        assert message.get("num_pipelines_in_repo") == str(1)
                        assert message.get("repo_hash") == hash_name(
                            get_ephemeral_repository_name(pipeline_name))
                    assert set(message.keys()) == EXPECTED_KEYS
                assert len(caplog.records) == 5
                assert result.exit_code == 0
Exemplo n.º 10
0
def test_all_step_events():  # pylint: disable=too-many-locals
    instance = DagsterInstance.ephemeral()

    workspace = workspace_from_load_target(
        PythonFileTarget(__file__, define_test_events_pipeline.__name__),
        instance,
    )
    pipeline_def = define_test_events_pipeline()
    mode = pipeline_def.get_default_mode_name()
    execution_plan = create_execution_plan(pipeline_def, mode=mode)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode)
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                'executionParams': {
                    'selector': {
                        'repositoryLocationName':
                        get_ephemeral_repository_name(pipeline_def.name),
                        'repositoryName':
                        get_ephemeral_repository_name(pipeline_def.name),
                        'pipelineName':
                        pipeline_def.name,
                    },
                    'runConfigData': {
                        'storage': {
                            'filesystem': {}
                        }
                    },
                    'mode': mode,
                    'executionMetadata': {
                        'runId': pipeline_run.run_id
                    },
                    'stepKeys': [step.key],
                },
            }
            res = execute_query(
                workspace,
                EXECUTE_PLAN_MUTATION,
                variables,
                instance=instance,
            )

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                assert 'data' in res, res
                assert 'executePlan' in res['data'], res
                assert 'stepEvents' in res['data']['executePlan'], res
                step_events = res['data']['executePlan']['stepEvents']

                events = [
                    dagster_event_from_dict(e, pipeline_def.name)
                    for e in step_events
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + '.' + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {
                    DagsterEventType(e.event_type_value)
                    for e in events
                }
            else:
                raise Exception(res['errors'])

    # build up a dict, incrementing all the event records we've produced in the run storage
    logs = instance.all_logs(pipeline_run.run_id)
    for log in logs:
        if not log.dagster_event or (DagsterEventType(
                log.dagster_event.event_type_value) not in STEP_EVENTS.union(
                    set([DagsterEventType.ENGINE_EVENT]))):
            continue
        if log.dagster_event.step_key:
            key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
        else:
            key = log.dagster_event.event_type_value
        event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    # Why are these retry events not handled? Because right now there is no way to configure retries
    # on executePlan -- this needs to change, and we should separate the ExecutionParams that get
    # sent to executePlan fromm those that get sent to startPipelineExecution and friends
    assert unhandled_events == {
        DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED
    }
Exemplo n.º 11
0
def test_all_step_events():  # pylint: disable=too-many-locals
    instance = DagsterInstance.ephemeral()

    pipeline_def = define_test_events_pipeline()

    workspace = create_in_process_ephemeral_workspace(
        pointer=CodePointer.from_python_file(
            __file__,
            define_test_events_pipeline.__name__,
            working_directory=None))

    mode = pipeline_def.get_default_mode_name()
    execution_plan = create_execution_plan(pipeline_def, mode=mode)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode)
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        "LogMessageEvent",
        "PipelineStartEvent",
        "PipelineSuccessEvent",
        "PipelineInitFailureEvent",
        "PipelineFailureEvent",
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                "executionParams": {
                    "selector": {
                        "repositoryLocationName":
                        IN_PROCESS_NAME,
                        "repositoryName":
                        get_ephemeral_repository_name(pipeline_def.name),
                        "pipelineName":
                        pipeline_def.name,
                    },
                    "runConfigData": {
                        "storage": {
                            "filesystem": {}
                        }
                    },
                    "mode": mode,
                    "executionMetadata": {
                        "runId": pipeline_run.run_id
                    },
                    "stepKeys": [step.key],
                },
            }
            res = execute_query(
                workspace,
                EXECUTE_PLAN_MUTATION,
                variables,
                instance=instance,
            )

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get("errors"):
                assert "data" in res, res
                assert "executePlan" in res["data"], res
                assert "stepEvents" in res["data"]["executePlan"], res
                step_events = res["data"]["executePlan"]["stepEvents"]

                events = [
                    dagster_event_from_dict(e, pipeline_def.name)
                    for e in step_events
                    if e["__typename"] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + "." + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {
                    DagsterEventType(e.event_type_value)
                    for e in events
                }
            else:
                raise Exception(res["errors"])

    # build up a dict, incrementing all the event records we've produced in the run storage
    logs = instance.all_logs(pipeline_run.run_id)
    for log in logs:
        if not log.dagster_event or (DagsterEventType(
                log.dagster_event.event_type_value) not in STEP_EVENTS.union(
                    set([DagsterEventType.ENGINE_EVENT]))):
            continue
        if log.dagster_event.step_key:
            key = log.dagster_event.step_key + "." + log.dagster_event.event_type_value
        else:
            key = log.dagster_event.event_type_value
        event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    # Why are these retry events not handled? Because right now there is no way to configure retries
    # on executePlan -- this needs to change, and we should separate the ExecutionParams that get
    # sent to executePlan fromm those that get sent to startPipelineExecution and friends
    assert unhandled_events == {
        DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED
    }