Пример #1
0
def test_external_execution_input_marshal_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(IOError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'num': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    results = execute_externalized_plan(
        pipeline,
        execution_plan,
        ['add_one.transform'],
        inputs_to_marshal={'add_one.transform': {
            'num': 'nope'
        }},
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(results) == 1
    marshal_result = results[0]
    assert marshal_result.success is False
    assert marshal_result.step.kind == StepKind.UNMARSHAL_INPUT
    assert isinstance(marshal_result.failure_data.dagster_error.user_exception,
                      IOError)
Пример #2
0
    def to_metadata(self):
        tags = {}
        if tags:
            for tag in self.tags:  # pylint: disable=E1133
                tags[tag['key']] = tag['value']

        return ExecutionMetadata(run_id=self.runId, tags=tags)
Пример #3
0
def test_external_execution_output_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterMarshalOutputError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['return_one.transform', 'add_one.transform'],
            outputs_to_marshal={
                'add_one.transform': [{
                    'output': 'result',
                    'path': 23434
                }]
            },
            execution_metadata=ExecutionMetadata(),
        )

    assert (
        str(exc_info.value) ==
        'Error during the marshalling of output result in step add_one.transform'
    )
    assert exc_info.value.output_name == 'result'
    assert exc_info.value.step_key == 'add_one.transform'
Пример #4
0
def test_external_execution_marshal_output_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    # guaranteed that folder does not exist
    hardcoded_uuid = '83fb4ace-5cab-459d-99b6-2ca9808c54a1'

    outputs_to_marshal = {
        'add_one.transform': [
            MarshalledOutput(
                output_name='result',
                marshalling_key='{uuid}/{uuid}'.format(uuid=hardcoded_uuid))
        ]
    }

    with pytest.raises(IOError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['return_one.transform', 'add_one.transform'],
            outputs_to_marshal=outputs_to_marshal,
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    assert 'No such file or directory' in str(exc_info.value)

    results = execute_externalized_plan(
        pipeline,
        execution_plan,
        ['return_one.transform', 'add_one.transform'],
        outputs_to_marshal=outputs_to_marshal,
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(results) == 3

    results_dict = {result.step.key: result for result in results}

    assert results_dict['return_one.transform'].success is True
    assert results_dict['add_one.transform'].success is True
    assert results_dict[
        'add_one.transform.marshal-output.result'].success is False
Пример #5
0
def test_external_execution_output_code_error_throw_on_user_error():
    pipeline = define_inty_pipeline()

    with pytest.raises(Exception) as exc_info:
        execute_marshalling(
            pipeline,
            ['user_throw_exception.transform'],
            execution_metadata=ExecutionMetadata(),
            throw_on_user_error=True,
        )

    assert str(exc_info.value) == 'whoops'
Пример #6
0
def test_external_execution_step_for_output_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            outputs_to_marshal={'nope': [MarshalledOutput('nope', 'nope')]},
            execution_metadata=ExecutionMetadata(),
        )
Пример #7
0
def create_test_pipeline_execution_context(loggers=None,
                                           resources=None,
                                           tags=None):
    run_id = str(uuid.uuid4())
    pipeline_def = PipelineDefinition(name='test_legacy_context', solids=[])
    return construct_pipeline_execution_context(
        execution_metadata=ExecutionMetadata(run_id,
                                             tags=tags,
                                             loggers=loggers),
        pipeline=pipeline_def,
        execution_context=ExecutionContext(),
        resources=resources,
        environment_config=create_environment_config(pipeline_def),
    )
Пример #8
0
def test_external_execution_unsatisfied_input_error():
    pipeline = define_inty_pipeline()

    with pytest.raises(DagsterInvalidSubplanMissingInputError) as exc_info:
        execute_marshalling(pipeline, ['add_one.transform'],
                            execution_metadata=ExecutionMetadata())

    assert exc_info.value.pipeline_name == 'basic_external_plan_execution'
    assert exc_info.value.step_keys == ['add_one.transform']
    assert exc_info.value.step.key == 'add_one.transform'
    assert exc_info.value.input_name == 'num'

    assert str(exc_info.value) == (
        "You have specified a subset execution on pipeline basic_external_plan_execution with "
        "step_keys ['add_one.transform']. You have failed to provide the required input num for "
        "step add_one.transform.")
Пример #9
0
def test_external_execution_output_code_error_no_throw_on_user_error():
    pipeline = define_inty_pipeline()

    step_events = execute_marshalling(
        pipeline,
        ['user_throw_exception.transform'],
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(step_events) == 1
    step_event = step_events[0]
    assert isinstance(step_event.failure_data.dagster_error,
                      DagsterExecutionStepExecutionError)
    assert str(
        step_event.failure_data.dagster_error.user_exception) == 'whoops'
Пример #10
0
def test_external_execution_step_for_input_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'nope': {
                'nope': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
        )

    assert exc_info.value.step_key == 'nope'
Пример #11
0
def test_external_execution_output_missing():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterMarshalOutputNotFoundError):
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            outputs_to_marshal={
                'add_one.transform': [{
                    'output': 'nope',
                    'path': 'nope'
                }]
            },
            execution_metadata=ExecutionMetadata(),
        )
Пример #12
0
def test_basic_pipeline_external_plan_execution():
    pipeline = define_inty_pipeline()

    with get_temp_file_names(2) as temp_files:

        temp_path, write_path = temp_files  # pylint: disable=W0632

        int_type = resolve_to_runtime_type(Int)

        serialize_to_file(int_type.serialization_strategy, 5, temp_path)

        execution_plan = create_execution_plan(pipeline)

        results = execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'num': temp_path
            }},
            outputs_to_marshal={
                'add_one.transform': [{
                    'output': 'result',
                    'path': write_path
                }]
            },
            execution_metadata=ExecutionMetadata(),
        )

        assert deserialize_from_file(int_type.serialization_strategy,
                                     write_path) == 6

    assert len(results) == 2

    thunk_step_result = results[0]

    assert thunk_step_result.kind == StepKind.VALUE_THUNK

    transform_step_result = results[1]
    assert transform_step_result.kind == StepKind.TRANSFORM
    assert transform_step_result.success
    assert transform_step_result.success_data.output_name == 'result'
    assert transform_step_result.success_data.value == 6
Пример #13
0
def test_external_execution_output_code_error_no_throw_on_user_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    results = execute_externalized_plan(
        pipeline,
        execution_plan,
        ['user_throw_exception.transform'],
        execution_metadata=ExecutionMetadata(),
        throw_on_user_error=False,
    )

    assert len(results) == 1
    step_result = results[0]
    assert isinstance(step_result.failure_data.dagster_error,
                      DagsterExecutionStepExecutionError)
    assert str(
        step_result.failure_data.dagster_error.user_exception) == 'whoops'
Пример #14
0
def test_external_execution_marshal_wrong_input_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterUnmarshalInputNotFoundError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'nope': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
        )

    assert str(
        exc_info.value
    ) == 'Input nope does not exist in execution step add_one.transform'
    assert exc_info.value.input_name == 'nope'
    assert exc_info.value.step_key == 'add_one.transform'
Пример #15
0
def test_external_execution_input_marshal_code_error():
    pipeline = define_inty_pipeline()

    execution_plan = create_execution_plan(pipeline)

    with pytest.raises(DagsterUnmarshalInputError) as exc_info:
        execute_externalized_plan(
            pipeline,
            execution_plan,
            ['add_one.transform'],
            inputs_to_marshal={'add_one.transform': {
                'num': 'nope'
            }},
            execution_metadata=ExecutionMetadata(),
        )

    assert (str(
        exc_info.value
    ) == 'Error during the marshalling of input num in step add_one.transform')
    assert exc_info.value.input_name == 'num'
    assert exc_info.value.step_key == 'add_one.transform'
Пример #16
0
def aws_lambda_handler(event, _context):
    """The lambda handler function."""
    logger.setLevel(logging.INFO)

    (
        run_id,
        step_idx,
        key,
        s3_bucket,
        s3_key_inputs,
        s3_key_body,
        s3_key_resources,
        s3_key_outputs,
    ) = LambdaInvocationPayload(*event['config'])

    s3 = boto3.client('s3')

    logger.info(
        'Beginning execution of lambda function for run_id %s step %s (%s)',
        run_id, step_idx, key)
    logger.info('Looking for inputs at %s/%s', s3_bucket, s3_key_inputs)

    intermediate_results_object = s3.get_object(Bucket=s3_bucket,
                                                Key=s3_key_inputs)
    intermediate_results = deserialize(
        intermediate_results_object['Body'].read())

    logger.info('Looking for resources at %s/%s', s3_bucket, s3_key_resources)
    resources_object = s3.get_object(Bucket=s3_bucket, Key=s3_key_resources)
    resources = deserialize(resources_object['Body'].read())
    execution_context = construct_pipeline_execution_context(
        pipeline=PipelineDefinition(name='dummy', solids=[]),
        execution_metadata=ExecutionMetadata(run_id=run_id),
        execution_context=ExecutionContext(loggers=[logger]),
        resources=resources,
        environment_config={},
    )

    logger.info('Looking for step body at %s/%s', s3_bucket, s3_key_body)
    step_body_object = s3.get_object(Bucket=s3_bucket, Key=s3_key_body)
    step = deserialize(step_body_object['Body'].read())

    logger.info('Checking inputs')
    if not _all_inputs_covered(step, intermediate_results):
        result_keys = set(intermediate_results.keys())
        expected_outputs = [ni.prev_output_handle for ni in step.step_inputs]
        logger.error(
            'Not all inputs covered for %s. Not executing.\nKeys in result: %s'
            '\nOutputs needed for inputs %s',
            key,
            result_keys,
            expected_outputs,
        )
        raise Exception()

    logger.info('Constructing input values')
    input_values = {}
    for step_input in step.step_inputs:
        prev_output_handle = step_input.prev_output_handle
        handle = (prev_output_handle.step.key, prev_output_handle.output_name)
        # FIXME - we need a less hacky strategy for serializing and deserializing input handles and
        # result values -- the subscript below is like accessing .success_data on the namedtuple
        # in the simple engine
        input_value = intermediate_results[handle][1].value
        input_values[step_input.name] = input_value

    logger.info('Executing step {key}'.format(key=key))
    step_events = list(
        iterate_step_events_for_step(step, execution_context, input_values))

    for step_event in step_events:
        check.invariant(isinstance(step_event, ExecutionStepEvent))
        output_name = step_event.success_data.output_name
        output_handle = (step.key, output_name)
        intermediate_results[output_handle] = (
            step_event.success,
            step_event.success_data,
            step_event.failure_data,
        )
        logger.info('Processing result: %s', output_name)

    logger.info('Uploading intermediate_results to %s', s3_key_outputs)
    s3.put_object(
        ACL='public-read',
        Body=serialize(intermediate_results),
        Bucket=s3_bucket,
        Key=s3_key_outputs,
    )