Exemplo n.º 1
0
    def make_python_callable(cls, dag_id, pipeline, env_config, step_keys):
        try:
            from dagster import RepositoryDefinition
            from dagster.cli.dynamic_loader import RepositoryContainer
            from dagster_graphql.cli import execute_query_from_cli
        except ImportError:
            raise AirflowException(
                'To use the DagsterPythonOperator, dagster and dagster_graphql must be installed '
                'in your Airflow environment.')
        repository = RepositoryDefinition('<<ephemeral repository>>',
                                          {dag_id: lambda: pipeline})
        repository_container = RepositoryContainer(repository=repository)

        def python_callable(**kwargs):
            run_id = kwargs.get('dag_run').run_id
            query = QUERY_TEMPLATE.format(
                config=env_config,
                run_id=run_id,
                step_keys=json.dumps(step_keys),
                pipeline_name=pipeline.name,
            )
            res = json.loads(
                execute_query_from_cli(repository_container,
                                       query,
                                       variables=None))
            cls.handle_errors(res, None)
            return cls.handle_result(res)

        return python_callable
Exemplo n.º 2
0
def define_context(raise_on_error=True):
    return DagsterGraphQLContext(
        RepositoryContainer(repository=define_repository()),
        PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
        raise_on_error=raise_on_error,
    )
Exemplo n.º 3
0
def test_smoke_app():
    repository_container = RepositoryContainer(repository=define_repo())
    pipeline_run_storage = PipelineRunStorage()
    flask_app = app.create_app(repository_container, pipeline_run_storage)
    client = flask_app.test_client()

    result = client.post('/graphql', data={'query': 'query { pipelines { nodes { name }}}'})
    data = json.loads(result.data.decode('utf-8'))
    assert len(data['data']['pipelines']['nodes']) == 1
    assert {node_data['name'] for node_data in data['data']['pipelines']['nodes']} == set(
        ['repo_demo_pipeline']
    )

    result = client.get('/graphql')
    assert result.status_code == 400
    data = json.loads(result.data.decode('utf-8'))
    assert len(data['errors']) == 1
    assert data['errors'][0]['message'] == 'Must provide query string.'

    result = client.get('/dagit/notebook?path=foo.bar')
    assert result.status_code == 400
    assert result.data.decode('utf-8') == 'Invalid Path'

    result = client.post('/graphql', data={'query': 'query { version { slkjd } }'})
    data = json.loads(result.data.decode('utf-8'))
    assert 'errors' in data
    assert len(data['errors']) == 1
    assert 'must not have a sub selection' in data['errors'][0]['message']

    result = client.get('static/foo/bar')
    assert result.status_code == 404
Exemplo n.º 4
0
def ui(variables, query, **kwargs):
    repository_target_info = load_target_info_from_cli_args(kwargs)

    repository_container = RepositoryContainer(repository_target_info)

    query = query.strip('\'" \n\t')

    execute_query_from_cli(repository_container, query, variables)
Exemplo n.º 5
0
def test_pipelines_python_error():
    ctx = DagsterGraphQLContext(
        RepositoryContainer(repository=define_error_pipeline_repo()),
        PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
    )
    result = execute_dagster_graphql(ctx, PIPELINES)
    assert result.data['pipelinesOrError']['__typename'] == "PythonError"
Exemplo n.º 6
0
def ui(host, port, sync, log, log_dir, no_watch=False, **kwargs):
    repository_target_info = load_target_info_from_cli_args(kwargs)

    # add the path for the cwd so imports in dynamically loaded code work correctly
    sys.path.append(os.getcwd())
    repository_container = RepositoryContainer(repository_target_info)

    check.invariant(
        not no_watch,
        'Do not set no_watch when calling the Dagit Python CLI directly -- this flag is a no-op'
        'at this level and should be set only when invoking dagit/bin/dagit.',
    )
    host_dagit_ui(log, log_dir, repository_container, sync, host, port)
Exemplo n.º 7
0
def test_pipelines_or_error_invalid():
    repository = RepositoryDefinition(
        name='test',
        pipeline_dict={'pipeline': define_circular_dependency_pipeline})
    context = DagsterGraphQLContext(
        RepositoryContainer(repository=repository),
        PipelineRunStorage(),
        execution_manager=SynchronousExecutionManager(),
    )
    result = execute_dagster_graphql(
        context,
        '{ pipelinesOrError { ... on InvalidDefinitionError { message } } }')
    msg = result.data['pipelinesOrError']['message']
    assert "Circular reference detected in solid csolid" in msg
Exemplo n.º 8
0
def test_running():
    run_id = make_new_run_id()
    repository_container = RepositoryContainer(
        RepositoryTargetInfo(
            repository_yaml=None,
            python_file=__file__,
            fn_name='define_passing_pipeline',
            module_name=None,
        ))
    pipeline = define_passing_pipeline()
    env_config = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': {
                        'csv': {
                            'path': script_relative_path('num.csv')
                        }
                    }
                }
            }
        }
    }
    selector = ExecutionSelector('pandas_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        create_execution_plan(pipeline, env_config),
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(repository_container,
                                       pipeline,
                                       pipeline_run,
                                       raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.SUCCESS
    events = pipeline_run.all_logs()
    assert events

    process_start_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1
Exemplo n.º 9
0
def execute_pipeline_through_queue(
    repository_info,
    pipeline_name,
    solid_subset,
    environment_dict,
    run_id,
    message_queue,
    reexecution_config,
    step_keys_to_execute,
):
    """
    Execute pipeline using message queue as a transport
    """

    message_queue.put(ProcessStartedSentinel(os.getpid()))

    run_config = RunConfig(
        run_id,
        event_callback=message_queue.put,
        executor_config=InProcessExecutorConfig(raise_on_error=False),
        reexecution_config=reexecution_config,
        step_keys_to_execute=step_keys_to_execute,
    )

    repository_container = RepositoryContainer(repository_info)
    if repository_container.repo_error:
        message_queue.put(
            MultiprocessingError(
                serializable_error_info_from_exc_info(
                    repository_container.repo_error)))
        return

    try:
        result = execute_pipeline(
            repository_container.repository.get_pipeline(
                pipeline_name).build_sub_pipeline(solid_subset),
            environment_dict,
            run_config=run_config,
        )
        return result
    except:  # pylint: disable=W0702
        error_info = serializable_error_info_from_exc_info(sys.exc_info())
        message_queue.put(MultiprocessingError(error_info))
    finally:
        message_queue.put(MultiprocessingDone())
        message_queue.close()
Exemplo n.º 10
0
def test_execution_crash():
    run_id = make_new_run_id()
    repository_container = RepositoryContainer(
        RepositoryTargetInfo(
            repository_yaml=None,
            python_file=__file__,
            fn_name='define_crashy_pipeline',
            module_name=None,
        ))
    pipeline = define_crashy_pipeline()
    env_config = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': {
                        'csv': {
                            'path': script_relative_path('num.csv')
                        }
                    }
                }
            }
        }
    }
    selector = ExecutionSelector('pandas_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        create_execution_plan(pipeline, env_config),
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(repository_container,
                                       pipeline,
                                       pipeline_run,
                                       raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    last_log = pipeline_run.all_logs()[-1]
    print(last_log.message)
    assert last_log.message.startswith(
        'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'
        .format(run_id=run_id))
Exemplo n.º 11
0
def test_failing():
    run_id = make_new_run_id()
    repository_container = RepositoryContainer(
        RepositoryTargetInfo(
            repository_yaml=None,
            python_file=__file__,
            fn_name='define_failing_pipeline',
            module_name=None,
        ))
    pipeline = define_failing_pipeline()
    env_config = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': {
                        'csv': {
                            'path': script_relative_path('num.csv')
                        }
                    }
                }
            }
        }
    }
    selector = ExecutionSelector('pandas_hello_world')
    pipeline_run = InMemoryPipelineRun(
        run_id,
        selector,
        env_config,
        create_execution_plan(pipeline, env_config),
        reexecution_config=None,
        step_keys_to_execute=None,
    )
    execution_manager = MultiprocessingExecutionManager()
    execution_manager.execute_pipeline(repository_container,
                                       pipeline,
                                       pipeline_run,
                                       raise_on_error=False)
    execution_manager.join()
    assert pipeline_run.status == PipelineRunStatus.FAILURE
    assert pipeline_run.all_logs()