def make_python_callable(cls, dag_id, pipeline, env_config, step_keys): try: from dagster import RepositoryDefinition from dagster.cli.dynamic_loader import RepositoryContainer from dagster_graphql.cli import execute_query_from_cli except ImportError: raise AirflowException( 'To use the DagsterPythonOperator, dagster and dagster_graphql must be installed ' 'in your Airflow environment.') repository = RepositoryDefinition('<<ephemeral repository>>', {dag_id: lambda: pipeline}) repository_container = RepositoryContainer(repository=repository) def python_callable(**kwargs): run_id = kwargs.get('dag_run').run_id query = QUERY_TEMPLATE.format( config=env_config, run_id=run_id, step_keys=json.dumps(step_keys), pipeline_name=pipeline.name, ) res = json.loads( execute_query_from_cli(repository_container, query, variables=None)) cls.handle_errors(res, None) return cls.handle_result(res) return python_callable
def define_context(raise_on_error=True): return DagsterGraphQLContext( RepositoryContainer(repository=define_repository()), PipelineRunStorage(), execution_manager=SynchronousExecutionManager(), raise_on_error=raise_on_error, )
def test_smoke_app(): repository_container = RepositoryContainer(repository=define_repo()) pipeline_run_storage = PipelineRunStorage() flask_app = app.create_app(repository_container, pipeline_run_storage) client = flask_app.test_client() result = client.post('/graphql', data={'query': 'query { pipelines { nodes { name }}}'}) data = json.loads(result.data.decode('utf-8')) assert len(data['data']['pipelines']['nodes']) == 1 assert {node_data['name'] for node_data in data['data']['pipelines']['nodes']} == set( ['repo_demo_pipeline'] ) result = client.get('/graphql') assert result.status_code == 400 data = json.loads(result.data.decode('utf-8')) assert len(data['errors']) == 1 assert data['errors'][0]['message'] == 'Must provide query string.' result = client.get('/dagit/notebook?path=foo.bar') assert result.status_code == 400 assert result.data.decode('utf-8') == 'Invalid Path' result = client.post('/graphql', data={'query': 'query { version { slkjd } }'}) data = json.loads(result.data.decode('utf-8')) assert 'errors' in data assert len(data['errors']) == 1 assert 'must not have a sub selection' in data['errors'][0]['message'] result = client.get('static/foo/bar') assert result.status_code == 404
def ui(variables, query, **kwargs): repository_target_info = load_target_info_from_cli_args(kwargs) repository_container = RepositoryContainer(repository_target_info) query = query.strip('\'" \n\t') execute_query_from_cli(repository_container, query, variables)
def test_pipelines_python_error(): ctx = DagsterGraphQLContext( RepositoryContainer(repository=define_error_pipeline_repo()), PipelineRunStorage(), execution_manager=SynchronousExecutionManager(), ) result = execute_dagster_graphql(ctx, PIPELINES) assert result.data['pipelinesOrError']['__typename'] == "PythonError"
def ui(host, port, sync, log, log_dir, no_watch=False, **kwargs): repository_target_info = load_target_info_from_cli_args(kwargs) # add the path for the cwd so imports in dynamically loaded code work correctly sys.path.append(os.getcwd()) repository_container = RepositoryContainer(repository_target_info) check.invariant( not no_watch, 'Do not set no_watch when calling the Dagit Python CLI directly -- this flag is a no-op' 'at this level and should be set only when invoking dagit/bin/dagit.', ) host_dagit_ui(log, log_dir, repository_container, sync, host, port)
def test_pipelines_or_error_invalid(): repository = RepositoryDefinition( name='test', pipeline_dict={'pipeline': define_circular_dependency_pipeline}) context = DagsterGraphQLContext( RepositoryContainer(repository=repository), PipelineRunStorage(), execution_manager=SynchronousExecutionManager(), ) result = execute_dagster_graphql( context, '{ pipelinesOrError { ... on InvalidDefinitionError { message } } }') msg = result.data['pipelinesOrError']['message'] assert "Circular reference detected in solid csolid" in msg
def test_running(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_passing_pipeline', module_name=None, )) pipeline = define_passing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.SUCCESS events = pipeline_run.all_logs() assert events process_start_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_START) assert len(process_start_events) == 1 process_started_events = get_events_of_type( events, DagsterEventType.PIPELINE_PROCESS_STARTED) assert len(process_started_events) == 1
def execute_pipeline_through_queue( repository_info, pipeline_name, solid_subset, environment_dict, run_id, message_queue, reexecution_config, step_keys_to_execute, ): """ Execute pipeline using message queue as a transport """ message_queue.put(ProcessStartedSentinel(os.getpid())) run_config = RunConfig( run_id, event_callback=message_queue.put, executor_config=InProcessExecutorConfig(raise_on_error=False), reexecution_config=reexecution_config, step_keys_to_execute=step_keys_to_execute, ) repository_container = RepositoryContainer(repository_info) if repository_container.repo_error: message_queue.put( MultiprocessingError( serializable_error_info_from_exc_info( repository_container.repo_error))) return try: result = execute_pipeline( repository_container.repository.get_pipeline( pipeline_name).build_sub_pipeline(solid_subset), environment_dict, run_config=run_config, ) return result except: # pylint: disable=W0702 error_info = serializable_error_info_from_exc_info(sys.exc_info()) message_queue.put(MultiprocessingError(error_info)) finally: message_queue.put(MultiprocessingDone()) message_queue.close()
def test_execution_crash(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_crashy_pipeline', module_name=None, )) pipeline = define_crashy_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE last_log = pipeline_run.all_logs()[-1] print(last_log.message) assert last_log.message.startswith( 'Exception: Pipeline execution process for {run_id} unexpectedly exited\n' .format(run_id=run_id))
def test_failing(): run_id = make_new_run_id() repository_container = RepositoryContainer( RepositoryTargetInfo( repository_yaml=None, python_file=__file__, fn_name='define_failing_pipeline', module_name=None, )) pipeline = define_failing_pipeline() env_config = { 'solids': { 'sum_solid': { 'inputs': { 'num': { 'csv': { 'path': script_relative_path('num.csv') } } } } } } selector = ExecutionSelector('pandas_hello_world') pipeline_run = InMemoryPipelineRun( run_id, selector, env_config, create_execution_plan(pipeline, env_config), reexecution_config=None, step_keys_to_execute=None, ) execution_manager = MultiprocessingExecutionManager() execution_manager.execute_pipeline(repository_container, pipeline, pipeline_run, raise_on_error=False) execution_manager.join() assert pipeline_run.status == PipelineRunStatus.FAILURE assert pipeline_run.all_logs()