Exemplo n.º 1
0
def define_test_type_pipeline():
    return PipelineDefinition(
        name='test_type_pipeline',
        solids=[
            define_solid_for_test_type('int_config', Int),
            define_solid_for_test_type('list_of_int_config', List(Int)),
            define_solid_for_test_type('nullable_list_of_int_config',
                                       Nullable(List(Int))),
            define_solid_for_test_type('list_of_nullable_int_config',
                                       List(Nullable(Int))),
            define_solid_for_test_type('nullable_list_of_nullable_int_config',
                                       Nullable(List(Nullable(Int)))),
            define_solid_for_test_type(
                'simple_dict',
                Dict({
                    'int_field': Field(Int),
                    'string_field': Field(String)
                })),
            define_solid_for_test_type(
                'dict_with_optional_field',
                Dict({
                    'nullable_int_field': Field(Nullable(Int)),
                    'optional_int_field': Field(Int, is_optional=True),
                    'string_list_field': Field(List(String)),
                }),
            ),
            define_solid_for_test_type(
                'nested_dict',
                Dict({'nested': Field(Dict({'int_field': Field(Int)}))})),
        ],
    )
Exemplo n.º 2
0
def test_nullable_list():
    list_of_ints = List(Int)

    assert not eval_config_value_from_dagster_type(list_of_ints, None).success
    assert eval_config_value_from_dagster_type(list_of_ints, []).success
    assert not eval_config_value_from_dagster_type(list_of_ints, [None]).success
    assert eval_config_value_from_dagster_type(list_of_ints, [1]).success

    nullable_list_of_ints = Nullable(List(Int))

    assert eval_config_value_from_dagster_type(nullable_list_of_ints, None).success
    assert eval_config_value_from_dagster_type(nullable_list_of_ints, []).success
    assert not eval_config_value_from_dagster_type(nullable_list_of_ints, [None]).success
    assert eval_config_value_from_dagster_type(nullable_list_of_ints, [1]).success

    list_of_nullable_ints = List(Nullable(Int))

    assert not eval_config_value_from_dagster_type(list_of_nullable_ints, None).success
    assert eval_config_value_from_dagster_type(list_of_nullable_ints, []).success
    assert eval_config_value_from_dagster_type(list_of_nullable_ints, [None]).success
    assert eval_config_value_from_dagster_type(list_of_nullable_ints, [1]).success

    nullable_list_of_nullable_ints = Nullable(List(Nullable(Int)))

    assert eval_config_value_from_dagster_type(nullable_list_of_nullable_ints, None).success
    assert eval_config_value_from_dagster_type(nullable_list_of_nullable_ints, []).success
    assert eval_config_value_from_dagster_type(nullable_list_of_nullable_ints, [None]).success
    assert eval_config_value_from_dagster_type(nullable_list_of_nullable_ints, [1]).success
def test_file_system_intermediate_store_composite_types_with_custom_serializer_for_inner_type(
):
    run_id = str(uuid.uuid4())

    intermediate_store = FileSystemIntermediateStore(run_id=run_id)
    assert intermediate_store.root == os.path.join(
        seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files')

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object(
                ['foo', 'bar'],
                context,
                resolve_to_runtime_type(List(LowercaseString)).inst(),
                ['list'],
            )
            assert intermediate_store.has_object(context, ['list'])
            assert intermediate_store.get_object(
                context,
                resolve_to_runtime_type(List(Bool)).inst(),
                ['list']) == ['foo', 'bar']

        finally:
            try:
                shutil.rmtree(intermediate_store.root)
            except seven.FileNotFoundError:
                pass
Exemplo n.º 4
0
def test_config_double_list_double_error():
    nested_lists = Dict(
        fields={'nested_list_one': Field(List(Int)), 'nested_list_two': Field(List(String))}
    )

    error_value = {'nested_list_one': 'kjdfkdj', 'nested_list_two': ['bar', 2]}
    error_result = eval_config_value_from_dagster_type(nested_lists, error_value)
    assert not error_result.success
    assert len(error_result.errors) == 2
Exemplo n.º 5
0
def test_display_name():

    int_runtime = resolve_to_runtime_type(Int)
    assert int_runtime.display_name == 'Int'
    list_int_runtime = resolve_to_runtime_type(List(Int))
    assert list_int_runtime.display_name == '[Int]'
    list_list_int_runtime = resolve_to_runtime_type(List(List(Int)))
    assert list_list_int_runtime.display_name == '[[Int]]'
    list_nullable_int_runtime = resolve_to_runtime_type(List(Nullable(Int)))
    assert list_nullable_int_runtime.display_name == '[Int?]'
Exemplo n.º 6
0
def test_inner_types():
    assert resolve_to_runtime_type(Int).inner_types == []

    list_int_runtime = resolve_to_runtime_type(List(Int))
    assert inner_type_key_set(list_int_runtime) == set(['Int'])

    list_list_int_runtime = resolve_to_runtime_type(List(List(Int)))
    assert inner_type_key_set(list_list_int_runtime) == set(
        ['Int', 'List.Int'])

    list_nullable_int_runtime = resolve_to_runtime_type(List(Nullable(Int)))
    assert inner_type_key_set(list_nullable_int_runtime) == set(
        ['Int', 'Nullable.Int'])
Exemplo n.º 7
0
def test_config_double_list():
    nested_lists = Dict(
        {'nested_list_one': Field(List(Int)), 'nested_list_two': Field(List(String))}
    )

    value = {'nested_list_one': [1, 2, 3], 'nested_list_two': ['foo', 'bar']}

    result = eval_config_value_from_dagster_type(nested_lists, value)
    assert result.success
    assert result.value == value

    error_value = {'nested_list_one': 'kjdfkdj', 'nested_list_two': ['bar']}

    error_result = eval_config_value_from_dagster_type(nested_lists, error_value)
    assert not error_result.success
Exemplo n.º 8
0
def test_item_error_list_path():
    called = {}

    @solid(config_field=Field(List(Int)))
    def required_list_int_solid(context):
        assert context.solid_config == [1, 2]
        called['yup'] = True

    pipeline_def = PipelineDefinition(name='list_path',
                                      solids=[required_list_int_solid])

    with pytest.raises(PipelineConfigEvaluationError) as pe_info:
        execute_pipeline(
            pipeline_def,
            environment_dict={
                'solids': {
                    'required_list_int_solid': {
                        'config': [1, 'nope']
                    }
                }
            },
        )

    pe = pe_info.value
    assert len(pe.errors) == 1
    rtm = pe.errors[0]
    assert rtm.reason == DagsterEvaluationErrorReason.RUNTIME_TYPE_MISMATCH

    assert 'Type failure at path "root:solids:required_list_int_solid:config[1]"' in str(
        pe)
Exemplo n.º 9
0
def define_more_complicated_nested_config():
    return PipelineDefinition(
        name='more_complicated_nested_config',
        solids=[
            SolidDefinition(
                name='a_solid_with_multilayered_config',
                inputs=[],
                outputs=[],
                transform_fn=lambda *_args: None,
                config_field=Field(
                    Dict(
                        {
                            'field_one': Field(String),
                            'field_two': Field(String, is_optional=True),
                            'field_three': Field(
                                String, is_optional=True, default_value='some_value'
                            ),
                            'nested_field': Field(
                                Dict(
                                    {
                                        'field_four_str': Field(String),
                                        'field_five_int': Field(Int),
                                        'field_six_nullable_int_list': Field(
                                            List(Nullable(Int)), is_optional=True
                                        ),
                                    }
                                )
                            ),
                        }
                    )
                ),
            )
        ],
    )
Exemplo n.º 10
0
def test_solid_list_config():
    value = [1, 2]
    called = {}

    def _test_config(context, _inputs):
        assert context.solid_config == value
        called['yup'] = True

    pipeline_def = PipelineDefinition(
        name='solid_list_config_pipeline',
        solids=[
            SolidDefinition(
                name='solid_list_config',
                inputs=[],
                outputs=[],
                config_field=Field(List(Int)),
                transform_fn=_test_config,
            )
        ],
    )

    result = execute_pipeline(
        pipeline_def,
        environment_dict={'solids': {
            'solid_list_config': {
                'config': value
            }
        }})

    assert result.success
    assert called['yup']
Exemplo n.º 11
0
def test_evaluate_list_error_top_level_mismatch():
    string_list = List(String)
    result = eval_config_value_from_dagster_type(string_list, 1)
    assert not result.success
    assert len(result.errors) == 1
    assert result.errors[
        0].reason == DagsterEvaluationErrorReason.RUNTIME_TYPE_MISMATCH
Exemplo n.º 12
0
def test_config_list_in_dict():
    nested_list = Dict({'nested_list': Field(List(Int))})

    value = {'nested_list': [1, 2, 3]}
    result = eval_config_value_from_dagster_type(nested_list, value)
    assert result.success
    assert result.value == value
Exemplo n.º 13
0
    def __init__(self, name, sql_queries, description=None):
        name = check.str_param(name, 'name')
        sql_queries = check.list_param(sql_queries, 'sql queries', of_type=str)
        description = check.opt_str_param(description, 'description', 'BigQuery query')

        def _compute_fn(context, _):
            query_job_config = _preprocess_config(context.solid_config.get('query_job_config', {}))

            # Retrieve results as pandas DataFrames
            results = []
            for sql_query in sql_queries:
                # We need to construct a new QueryJobConfig for each query.
                # See: https://bit.ly/2VjD6sl
                cfg = QueryJobConfig(**query_job_config) if query_job_config else None
                context.log.info(
                    'executing query %s with config: %s'
                    % (sql_query, cfg.to_api_repr() if cfg else '(no config provided)')
                )
                results.append(context.resources.bq.query(sql_query, job_config=cfg).to_dataframe())

            yield Result(results)

        super(BigQuerySolidDefinition, self).__init__(
            name=name,
            description=description,
            inputs=[InputDefinition(_START, Nothing)],
            outputs=[OutputDefinition(List(DataFrame))],
            compute_fn=_compute_fn,
            config_field=define_bigquery_query_config(),
            metadata={'kind': 'sql', 'sql': '\n'.join(sql_queries)},
        )
Exemplo n.º 14
0
def test_interleaved_values():
    @solid(inputs=[InputDefinition('stuff', List(Any))])
    def collect(_context, stuff):
        assert set(stuff) == set([1, None, 'one'])
        return stuff

    @lambda_solid
    def emit_num():
        return 1

    @lambda_solid
    def emit_none():
        pass

    @lambda_solid
    def emit_str():
        return 'one'

    result = execute_pipeline(
        PipelineDefinition(
            name='input_test',
            solids=[emit_num, emit_none, emit_str, collect],
            dependencies={
                'collect': {
                    'stuff':
                    MultiDependencyDefinition([
                        DependencyDefinition('emit_num'),
                        DependencyDefinition('emit_none'),
                        DependencyDefinition('emit_str'),
                    ])
                }
            },
        ))
    assert result.success
Exemplo n.º 15
0
def test_two_list_types():
    assert PipelineDefinition(
        name='two_types',
        solids=[
            SolidDefinition(
                name='two_list_type',
                inputs=[],
                outputs=[],
                config_field=Field(
                    Dict({
                        'list_one': Field(List(Int)),
                        'list_two': Field(List(Int))
                    })),
                transform_fn=lambda *_args: None,
            )
        ],
    )
Exemplo n.º 16
0
    def __init__(self, name, main_class, description=None):
        name = check.str_param(name, 'name')
        main_class = check.str_param(main_class, 'main_class')
        description = check.opt_str_param(
            description,
            'description',
            'This solid is a generic representation of a parameterized Spark job.',
        )

        def _spark_compute_fn(context, _):
            '''Define Spark execution.

            This function defines how we'll execute the Spark job and invokes spark-submit.
            '''

            spark_shell_cmd = create_spark_shell_cmd(context.solid_config,
                                                     main_class)

            context.log.info("Running spark-submit: " +
                             ' '.join(spark_shell_cmd))
            retcode = run_spark_subprocess(spark_shell_cmd, context.log)

            if retcode != 0:
                raise SparkSolidError(
                    'Spark job failed. Please consult your logs.')

            yield Result(context.solid_config.get('spark_outputs'), 'paths')

        super(SparkSolidDefinition, self).__init__(
            name=name,
            description=description,
            inputs=[InputDefinition('spark_inputs', List(Path))],
            outputs=[OutputDefinition(dagster_type=List(Path), name='paths')],
            compute_fn=_spark_compute_fn,
            config_field=define_spark_config(),
            metadata={
                'kind': 'spark',
                'main_class': main_class
            },
            step_metadata_fn=functools.partial(step_metadata_fn,
                                               solid_name=name,
                                               main_class=main_class),
        )
Exemplo n.º 17
0
def test_s3_intermediate_store_composite_types_with_custom_serializer_for_inner_type():
    run_id = str(uuid.uuid4())

    intermediate_store = S3IntermediateStore(run_id=run_id, s3_bucket='dagster-airflow-scratch')
    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object(
                ['foo', 'bar'],
                context,
                resolve_to_runtime_type(List(LowercaseString)).inst(),
                ['list'],
            )
            assert intermediate_store.has_object(context, ['list'])
            assert intermediate_store.get_object(
                context, resolve_to_runtime_type(List(Bool)).inst(), ['list']
            ) == ['foo', 'bar']

        finally:
            intermediate_store.rm_object(context, ['foo'])
Exemplo n.º 18
0
def define_pipeline_with_list():
    return PipelineDefinition(
        name='pipeline_with_list',
        solids=[
            SolidDefinition(
                name='solid_with_list',
                inputs=[],
                outputs=[],
                transform_fn=lambda *_args: None,
                config_field=Field(List(Int)),
            )
        ],
    )
Exemplo n.º 19
0
 def _inputs_for_source(self, source):
     if source == BigQueryLoadSource.DataFrame:
         return [InputDefinition('df', DataFrame)]
     elif source == BigQueryLoadSource.File:
         return [InputDefinition('file_path', Path)]
     elif source == BigQueryLoadSource.Gcs:
         return [InputDefinition('source_uris', List(Path))]
     else:
         raise BigQueryError(
             'invalid source specification -- must be one of [%s]'
             % ','.join(
                 [BigQueryLoadSource.DataFrame, BigQueryLoadSource.File, BigQueryLoadSource.Gcs]
             )
         )
Exemplo n.º 20
0
def test_file_system_intermediate_store_composite_types():
    run_id = str(uuid.uuid4())

    intermediate_store = FileSystemIntermediateStore(run_id=run_id)
    assert intermediate_store.root == os.path.join(
        seven.get_system_temp_directory(), 'dagster', 'runs', run_id, 'files')

    with yield_empty_pipeline_context(run_id=run_id) as context:
        try:
            intermediate_store.set_object([True, False], context,
                                          resolve_to_runtime_type(
                                              List(Bool)).inst(), ['bool'])
            assert intermediate_store.has_object(context, ['bool'])
            assert intermediate_store.get_object(
                context,
                resolve_to_runtime_type(List(Bool)).inst(),
                ['bool']) == [True, False]

        finally:
            try:
                shutil.rmtree(intermediate_store.root)
            except seven.FileNotFoundError:
                pass
Exemplo n.º 21
0
def test_file_system_intermediate_store_with_composite_type_storage_plugin():
    run_id = str(uuid.uuid4())

    # FIXME need a dedicated test bucket
    intermediate_store = FileSystemIntermediateStore(
        run_id=run_id,
        types_to_register={
            RuntimeString.inst(): FancyStringFilesystemTypeStoragePlugin
        },
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(['hello'], context,
                                         resolve_to_runtime_type(List(String)),
                                         ['obj_name'])

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(['hello'], context,
                                         resolve_to_runtime_type(
                                             Nullable(String)), ['obj_name'])

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(['hello'], context,
                                         resolve_to_runtime_type(
                                             List(Nullable(String))),
                                         ['obj_name'])

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(['hello'], context,
                                         resolve_to_runtime_type(
                                             Nullable(List(String))),
                                         ['obj_name'])
Exemplo n.º 22
0
def test_single_level_dict_lists_and_nullable():
    output = print_type_to_string(
        Dict({
            'nullable_int_field': Field(Nullable(Int)),
            'optional_int_field': Field(Int, is_optional=True),
            'string_list_field': Field(List(String)),
        }))

    expected = '''{
  nullable_int_field: Int?
  optional_int_field?: Int
  string_list_field: [String]
}'''

    assert output == expected
Exemplo n.º 23
0
def test_s3_intermediate_store_with_composite_type_storage_plugin():
    run_id = str(uuid.uuid4())

    # FIXME need a dedicated test bucket
    intermediate_store = S3IntermediateStore(
        run_id=run_id,
        s3_bucket='dagster-airflow-scratch',
        types_to_register={RuntimeString.inst(): FancyStringS3TypeStoragePlugin},
    )

    with yield_empty_pipeline_context(run_id=run_id) as context:
        with pytest.raises(check.NotImplementedCheckError):
            intermediate_store.set_value(
                ['hello'], context, resolve_to_runtime_type(List(String)), ['obj_name']
            )
def test_wrapping_nothing():
    with pytest.raises(DagsterInvalidDefinitionError):

        @lambda_solid(output=OutputDefinition(List(Nothing)))
        def _():
            pass

    with pytest.raises(DagsterInvalidDefinitionError):

        @lambda_solid(inputs=[InputDefinition(List(Nothing))])
        def _():
            pass

    with pytest.raises(DagsterInvalidDefinitionError):

        @lambda_solid(output=OutputDefinition(Nullable(Nothing)))
        def _():
            pass

    with pytest.raises(DagsterInvalidDefinitionError):

        @lambda_solid(inputs=[InputDefinition(Nullable(Nothing))])
        def _():
            pass
Exemplo n.º 25
0
def test_config_list_in_dict_error():
    nested_list = Dict({'nested_list': Field(List(Int))})

    value = {'nested_list': [1, 'bar', 3]}
    result = eval_config_value_from_dagster_type(nested_list, value)
    assert not result.success
    assert len(result.errors) == 1
    error = result.errors[0]
    assert error.reason == DagsterEvaluationErrorReason.RUNTIME_TYPE_MISMATCH
    assert len(error.stack.entries) == 2
    stack_entry = error.stack.entries[0]
    assert isinstance(stack_entry, EvaluationStackPathEntry)
    assert stack_entry.field_name == 'nested_list'
    list_entry = error.stack.entries[1]
    assert isinstance(list_entry, EvaluationStackListItemEntry)
    assert list_entry.list_index == 1
Exemplo n.º 26
0
    def __init__(self, name, sql_queries, parameters=None, description=None):
        name = check.str_param(name, 'name')
        sql_queries = check.list_param(sql_queries, 'sql queries', of_type=str)

        description = check.opt_str_param(
            description,
            'description',
            'This solid is a generic representation of a parameterized Snowflake query.',
        )

        def _snowflake_compute_fn(context, _):  # pylint: disable=too-many-locals
            '''Define Snowflake execution.

            This function defines how we'll execute the Snowflake SQL query.
            '''
            with context.resources.snowflake.get_connection(
                    context.log) as conn:
                with closing(conn.cursor()) as cursor:
                    results = []
                    for query in sql_queries:
                        if sys.version_info[0] < 3:
                            query = query.encode('utf-8')

                        context.log.info(
                            'Executing SQL query %s %s' %
                            (query, 'with parameters ' +
                             str(parameters) if parameters else ''))
                        cursor.execute(query, parameters)  # pylint: disable=E1101
                        fetchall_results = cursor.fetchall()  # pylint: disable=E1101
                        results.append(pd.DataFrame(fetchall_results))

                    yield Result(results)

        super(SnowflakeSolidDefinition, self).__init__(
            name=name,
            description=description,
            inputs=[InputDefinition('start', Nothing)],
            outputs=[OutputDefinition(List(dagster_pd.DataFrame))],
            compute_fn=_snowflake_compute_fn,
            metadata={
                'kind': 'sql',
                'sql': '\n'.join(sql_queries)
            },
        )
Exemplo n.º 27
0
def test_working_list_path():
    called = {}

    @solid(config_field=Field(List(Int)))
    def required_list_int_solid(context):
        assert context.solid_config == [1, 2]
        called['yup'] = True

    pipeline_def = PipelineDefinition(name='list_path',
                                      solids=[required_list_int_solid])

    result = execute_pipeline(pipeline_def,
                              environment_dict={
                                  'solids': {
                                      'required_list_int_solid': {
                                          'config': [1, 2]
                                      }
                                  }
                              })

    assert result.success
    assert called['yup']
Exemplo n.º 28
0
def test_nothing_deps():
    @solid(inputs=[InputDefinition('stuff', List(Any))])
    def collect(_context, stuff):
        return stuff

    @lambda_solid(output=OutputDefinition(Int))
    def emit_num():
        return 1

    @lambda_solid(output=OutputDefinition(Nothing))
    def emit_nothing():
        pass

    @lambda_solid(output=OutputDefinition(String))
    def emit_str():
        return 'one'

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r'Input "stuff" expects a value of type \[Any\] and output '
            '"result" returns type Nothing',
    ):
        PipelineDefinition(
            name='input_test',
            solids=[emit_num, emit_nothing, emit_str, collect],
            dependencies={
                'collect': {
                    'stuff':
                    MultiDependencyDefinition([
                        DependencyDefinition('emit_num'),
                        DependencyDefinition('emit_nothing'),
                        DependencyDefinition('emit_str'),
                    ])
                }
            },
        )
Exemplo n.º 29
0
def test_simple_values():
    @solid(inputs=[InputDefinition('numbers', List(Int))])
    def sum_num(_context, numbers):
        # cant guarantee order
        assert set(numbers) == set([1, 2, 3])
        return sum(numbers)

    @lambda_solid
    def emit_1():
        return 1

    @lambda_solid
    def emit_2():
        return 2

    @lambda_solid
    def emit_3():
        return 3

    result = execute_pipeline(
        PipelineDefinition(
            name='input_test',
            solids=[emit_1, emit_2, emit_3, sum_num],
            dependencies={
                'sum_num': {
                    'numbers':
                    MultiDependencyDefinition([
                        DependencyDefinition('emit_1'),
                        DependencyDefinition('emit_2'),
                        DependencyDefinition('emit_3'),
                    ])
                }
            },
        ))
    assert result.success
    assert result.result_for_solid('sum_num').transformed_value() == 6
Exemplo n.º 30
0
    def __init__(self, name, main_class, description=None):
        name = check.str_param(name, 'name')
        main_class = check.str_param(main_class, 'main_class')
        description = check.opt_str_param(
            description,
            'description',
            'This solid is a generic representation of a parameterized Spark job.',
        )

        def _spark_transform_fn(context, _):
            '''Define Spark execution.

            This function defines how we'll execute the Spark job and invokes spark-submit.
            '''

            # Extract parameters from config
            (
                master_url,
                deploy_mode,
                application_jar,
                spark_conf,
                application_arguments,
                spark_home,
                spark_outputs,
            ) = [
                context.solid_config.get(k) for k in (
                    'master_url',
                    'deploy_mode',
                    'application_jar',
                    'spark_conf',
                    'application_arguments',
                    'spark_home',
                    'spark_outputs',
                )
            ]

            # Let the user use env vars in the jar path
            application_jar = os.path.expandvars(application_jar)

            if not os.path.exists(application_jar):
                raise SparkSolidError(
                    ('Application jar {} does not exist. A valid jar must be '
                     'built before running this solid.'.format(application_jar)
                     ))

            spark_home = spark_home if spark_home else os.environ.get(
                'SPARK_HOME')

            if spark_home is None:
                raise SparkSolidError((
                    'No spark home set. You must either pass spark_home in config or '
                    'set $SPARK_HOME in your environment (got None).'))

            deploy_mode = ['--deploy-mode', '{}'.format(deploy_mode)
                           ] if deploy_mode else []

            spark_shell_cmd = (
                [
                    '{}/bin/spark-submit'.format(spark_home),
                    '--class',
                    main_class,
                    '--master',
                    master_url,
                ] + deploy_mode + parse_spark_config(spark_conf) +
                [application_jar] +
                ([application_arguments] if application_arguments else []))

            context.log.info("Running spark-submit: " +
                             ' '.join(spark_shell_cmd))
            retcode = run_spark_subprocess(spark_shell_cmd, context.log)

            if retcode != 0:
                raise SparkSolidError(
                    'Spark job failed. Please consult your logs.')

            yield Result(spark_outputs, 'paths')

        super(SparkSolidDefinition, self).__init__(
            name=name,
            description=description,
            inputs=[InputDefinition('spark_inputs', List(Path))],
            outputs=[OutputDefinition(dagster_type=List(Path), name='paths')],
            transform_fn=_spark_transform_fn,
            config_field=define_spark_config(),
            metadata={
                'kind': 'spark',
                'main_class': main_class
            },
        )