Пример #1
0
def build_dauphin_solid_handles(represented_pipeline,
                                current_dep_index,
                                parent=None):
    check.inst_param(represented_pipeline, 'represented_pipeline',
                     RepresentedPipeline)
    check.opt_inst_param(parent, 'parent', DauphinSolidHandle)
    all_handle = []
    for solid_invocation in current_dep_index.solid_invocations:
        solid_name, solid_def_name = solid_invocation.solid_name, solid_invocation.solid_def_name
        handle = DauphinSolidHandle(
            solid=DauphinSolid(represented_pipeline, solid_name,
                               current_dep_index),
            handle=SolidHandle(solid_name,
                               parent.handleID if parent else None),
            parent=parent if parent else None,
        )
        solid_def_snap = represented_pipeline.get_solid_def_snap(
            solid_def_name)
        if isinstance(solid_def_snap, CompositeSolidDefSnap):
            all_handle += build_dauphin_solid_handles(
                represented_pipeline,
                represented_pipeline.get_dep_structure_index(solid_def_name),
                handle,
            )

        all_handle.append(handle)

    return all_handle
Пример #2
0
 def _result_for_handle(self, solid: Solid,
                        handle: SolidHandle) -> NodeExecutionResult:
     node_def = solid.definition
     events_for_handle = _filter_step_events_by_handle(
         self.event_list, self.handle, handle)
     outputs_for_handle = (_filter_outputs_by_handle(
         self._output_capture, self.handle, handle)
                           if self._output_capture else None)
     handle_with_ancestor = handle.with_ancestor(self.handle)
     if not handle_with_ancestor:
         raise DagsterInvariantViolationError(
             f"No handle provided for solid {solid.name}")
     if isinstance(node_def, SolidDefinition):
         return InProcessSolidResult(
             solid_def=node_def,
             handle=handle_with_ancestor,
             all_events=events_for_handle,
             output_capture=outputs_for_handle,
         )
     else:
         return InProcessGraphResult(
             graph_def=node_def,
             handle=handle_with_ancestor,
             all_events=events_for_handle,
             output_capture=outputs_for_handle,
         )
Пример #3
0
    def output_value(self, output_name=DEFAULT_OUTPUT):
        check.str_param(output_name, "output_name")

        if not self.solid.definition.has_output(output_name):
            raise DagsterInvariantViolationError(
                "Output '{output_name}' not defined in composite solid '{solid}': "
                "{outputs_clause}. If you were expecting this output to be present, you may "
                "be missing an output_mapping from an inner solid to its enclosing composite "
                "solid.".format(
                    output_name=output_name,
                    solid=self.solid.name,
                    outputs_clause="found outputs {output_names}".format(
                        output_names=str(list(self.solid.definition.output_dict.keys()))
                    )
                    if self.solid.definition.output_dict
                    else "no output mappings were defined",
                )
            )

        output_mapping = self.solid.definition.get_output_mapping(output_name)

        return self._result_for_handle(
            self.solid.definition.solid_named(output_mapping.solid_name),
            SolidHandle(output_mapping.solid_name, None),
        ).output_value(output_mapping.output_name)
Пример #4
0
    def result_for_handle(self, handle):
        '''Get the result of a solid by its solid handle string.

        This allows indexing into top-level solids to retrieve the results of children of
        composite solids.

        Args:
            handle (str): The string handle for the solid.

        Returns:
            :py:class:`SolidExecutionResult`: The result of the given solid.
        '''
        check.str_param(handle, 'handle')

        solid_def = self.pipeline.get_solid(SolidHandle.from_string(handle))
        if not solid_def:
            raise DagsterInvariantViolationError(
                'Can not find solid handle {handle} in pipeline.'.format(
                    handle=handle))

        events_by_kind = defaultdict(list)
        for event in self.event_list:
            if event.is_step_event:
                if event.solid_handle.is_or_descends_from(handle):
                    events_by_kind[event.step_kind].append(event)

        return SolidExecutionResult(solid_def, events_by_kind,
                                    self.reconstruct_context)
Пример #5
0
def test_multiline_logging_complex():
    msg = 'DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0'
    kwargs = {
        'pipeline':
        'example',
        'pipeline_name':
        'example',
        'step_key':
        'start.materialization.output.result.0',
        'solid':
        'start',
        'solid_definition':
        'emit_num',
        'dagster_event':
        DagsterEvent(
            event_type_value='STEP_FAILURE',
            pipeline_name='error_monster',
            step_key='start.materialization.output.result.0',
            solid_handle=SolidHandle('start', None),
            step_kind_value='MATERIALIZATION_THUNK',
            logging_tags={
                'pipeline': 'error_monster',
                'step_key': 'start.materialization.output.result.0',
                'solid': 'start',
                'solid_definition': 'emit_num',
            },
            event_specific_data=StepFailureData(
                error=SerializableErrorInfo(
                    message=
                    "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
                    stack=['a stack message'],
                    cls_name='FileNotFoundError',
                ),
                user_failure_data=None,
            ),
        ),
    }

    with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger):

        dl = DagsterLogManager('123', {}, [logger])
        dl.info(msg, **kwargs)

    expected_results = [
        'error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step '
        'start.materialization.output.result.0',
        '            cls_name = "FileNotFoundError"',
        '       error_message = "FileNotFoundError: [Errno 2] No such file or directory:'
        ' \'/path/to/file\'\\n"',
        '               solid = "start"',
        '    solid_definition = "emit_num"',
        '            step_key = "start.materialization.output.result.0"',
        '',
        "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'",
        '',
        'a stack message',
    ]

    assert captured_results[0].split('\n') == expected_results
Пример #6
0
def test_multiline_logging_complex():
    msg = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0"
    kwargs = {
        "pipeline":
        "example",
        "pipeline_name":
        "example",
        "step_key":
        "start.materialization.output.result.0",
        "solid":
        "start",
        "solid_definition":
        "emit_num",
        "dagster_event":
        DagsterEvent(
            event_type_value="STEP_FAILURE",
            pipeline_name="error_monster",
            step_key="start.materialization.output.result.0",
            solid_handle=SolidHandle("start", None),
            step_kind_value="MATERIALIZATION_THUNK",
            logging_tags={
                "pipeline": "error_monster",
                "step_key": "start.materialization.output.result.0",
                "solid": "start",
                "solid_definition": "emit_num",
            },
            event_specific_data=StepFailureData(
                error=SerializableErrorInfo(
                    message=
                    "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
                    stack=["a stack message"],
                    cls_name="FileNotFoundError",
                ),
                user_failure_data=None,
            ),
        ),
    }

    with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger):

        dl = DagsterLogManager("123", {}, [logger])
        dl.info(msg, **kwargs)

    expected_results = [
        "error_monster - 123 - STEP_FAILURE - DagsterEventType.STEP_FAILURE for step "
        "start.materialization.output.result.0",
        '            cls_name = "FileNotFoundError"',
        '       error_message = "FileNotFoundError: [Errno 2] No such file or directory:'
        " '/path/to/file'\\n\"",
        '               solid = "start"',
        '    solid_definition = "emit_num"',
        '            step_key = "start.materialization.output.result.0"',
        "",
        "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'",
        "",
        "a stack message",
    ]

    assert captured_results[0].split("\n") == expected_results
Пример #7
0
 def result_for_node(self, name: str) -> NodeExecutionResult:
     if not self._graph_def.has_solid_named(name):
         raise DagsterInvariantViolationError(
             "Tried to get result for node '{name}' in '{container}'. No such top level "
             "node.".format(name=name, container=self._graph_def.name))
     handle = SolidHandle(name, None)
     solid = self._graph_def.get_solid(handle)
     return self._result_for_handle(solid, handle)
Пример #8
0
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    materialization = event_dict.get('intermediateMaterialization') or {}

    # Get event_type
    event_type = _handled_events().get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            value_repr=event_dict['valueRepr'],
            intermediate_materialization=Materialization(
                path=materialization.get('path'),
                description=materialization.get('description')),
        )

    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        event_specific_data = StepMaterializationData(
            materialization=Materialization(path=materialization.get('path'),
                                            description=materialization.get(
                                                'description')))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        result_metadata = event_dict['expectationResult'][
            'resultMetadataJsonString']
        expectation_result = ExpectationResult(
            event_dict['expectationResult']['success'],
            event_dict['expectationResult']['name'],
            event_dict['expectationResult']['message'],
            json.loads(result_metadata) if result_metadata else None,
        )
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        error_info = SerializableErrorInfo(event_dict['error']['message'],
                                           stack=None,
                                           cls_name=None)
        event_specific_data = StepFailureData(error_info)

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict['step']['key'],
        solid_handle=SolidHandle(event_dict['step']['solidHandleID'], None,
                                 None),
        step_kind_value=event_dict['step']['kind'],
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
Пример #9
0
def _filter_step_events_by_handle(
        event_list: List[DagsterEvent], ancestor_handle: SolidHandle,
        current_handle: SolidHandle) -> List[DagsterEvent]:
    events = []
    handle_with_ancestor = cast(SolidHandle,
                                current_handle.with_ancestor(ancestor_handle))
    for event in event_list:
        if event.is_step_event:
            solid_handle = cast(SolidHandle, event.solid_handle)
            if solid_handle.is_or_descends_from(handle_with_ancestor):
                events.append(event)

    return events
Пример #10
0
    def output_for_solid(self, handle_str, output_name=DEFAULT_OUTPUT):
        """Get the output of a solid by its solid handle string and output name.

        Args:
            handle_str (str): The string handle for the solid.
            output_name (str): Optional. The name of the output, default to DEFAULT_OUTPUT.

        Returns:
            The output value for the handle and output_name.
        """
        check.str_param(handle_str, "handle_str")
        check.str_param(output_name, "output_name")
        return self.result_for_handle(SolidHandle.from_string(handle_str)).output_value(output_name)
Пример #11
0
def _filter_outputs_by_handle(
    output_dict: Dict[StepOutputHandle, Any],
    ancestor_handle: SolidHandle,
    current_handle: SolidHandle,
):
    handle_with_ancestor = current_handle.with_ancestor(ancestor_handle)
    outputs = {}
    for step_output_handle, value in output_dict.items():
        handle = _get_solid_handle_from_output(step_output_handle)
        if handle and handle_with_ancestor and handle.is_or_descends_from(
                handle_with_ancestor):
            outputs[step_output_handle] = value
    return outputs
Пример #12
0
    def output_values(self):
        values = {}

        for output_name in self.solid.definition.output_dict:
            output_mapping = self.solid.definition.get_output_mapping(output_name)

            inner_solid_values = self._result_for_handle(
                self.solid.definition.solid_named(output_mapping.solid_name),
                SolidHandle(output_mapping.solid_name, None),
            ).output_values

            if inner_solid_values is not None:  # may be None if inner solid was skipped
                if output_mapping.output_name in inner_solid_values:
                    values[output_name] = inner_solid_values[output_mapping.output_name]

        return values
Пример #13
0
def build_dauphin_solid_handles(container, parent=None):
    check.inst_param(container, 'container', IContainSolids)
    all_handle = []

    for solid in container.solids:
        handle = DauphinSolidHandle(
            solid=build_dauphin_solid(solid, container.dependency_structure),
            handle=SolidHandle(solid.name, solid.definition.name,
                               parent.handleID if parent else None),
            parent=parent if parent else None,
        )
        if isinstance(solid.definition, IContainSolids):
            all_handle += build_dauphin_solid_handles(solid.definition, handle)

        all_handle.append(handle)

    return all_handle
Пример #14
0
    def result_for_solid(self, name):
        '''Get the result of a top level solid.

        Args:
            name (str): The name of the top-level solid or aliased solid for which to retrieve the
                result.

        Returns:
            Union[CompositeSolidExecutionResult, SolidExecutionResult]: The result of the solid
            execution within the pipeline.
        '''
        if not self.container.has_solid_named(name):
            raise DagsterInvariantViolationError(
                'Tried to get result for solid \'{name}\' in \'{container}\'. No such top level '
                'solid.'.format(name=name, container=self.container.name))

        return self.result_for_handle(SolidHandle(name, None))
Пример #15
0
    def output_values(self) -> Dict[str, Any]:
        values = {}

        for output_name in self._graph_def.output_dict:
            output_mapping = self._graph_def.get_output_mapping(output_name)

            inner_solid_values = self._result_for_handle(
                self._graph_def.solid_named(
                    output_mapping.maps_from.solid_name),
                SolidHandle(output_mapping.maps_from.solid_name, None),
            ).output_values

            if inner_solid_values is not None:  # may be None if inner solid was skipped
                if output_mapping.maps_from.output_name in inner_solid_values:
                    values[output_name] = inner_solid_values[
                        output_mapping.maps_from.output_name]

        return values
Пример #16
0
    def result_for_handle(self, handle):
        '''Get the result of a solid by its solid handle string.

        This allows indexing into top-level solids to retrieve the results of children of
        composite solids.

        Args:
            handle (str): The string handle for the solid.

        Returns:
            [CompositeSolidExecutionResult, SolidExecutionResult]: The result of the given solid.
        '''
        check.str_param(handle, 'handle')

        solid = self.solid.definition.get_solid(
            SolidHandle.from_string(handle))

        return self._result_for_handle(solid, '.'.join([self.handle, handle]))
Пример #17
0
    def result_for_handle(self, handle):
        '''Get a :py:class:`SolidExecutionResult` for a given solid handle string.
        '''
        check.str_param(handle, 'handle')

        solid_def = self.pipeline.get_solid(SolidHandle.from_string(handle))
        if not solid_def:
            raise DagsterInvariantViolationError(
                'Cant not find solid handle {handle} in pipeline.'.format(
                    handle=handle))

        events_by_kind = defaultdict(list)
        for event in self.event_list:
            if event.is_step_event:
                if event.solid_handle.is_or_descends_from(handle):
                    events_by_kind[event.step_kind].append(event)

        return SolidExecutionResult(solid_def, events_by_kind,
                                    self.reconstruct_context)
Пример #18
0
def define_solid_dictionary_cls(
    name, solids, dependency_structure, pipeline_name, parent_handle=None
):
    check.str_param(name, 'name')
    check.list_param(solids, 'solids', of_type=Solid)
    check.inst_param(dependency_structure, 'dependency_structure', DependencyStructure)
    check.str_param(pipeline_name, 'pipeline_name')
    check.opt_inst_param(parent_handle, 'parent_handle', SolidHandle)

    fields = {}
    for solid in solids:
        if solid.definition.has_config_entry:
            fields[solid.name] = define_isolid_field(
                solid,
                SolidHandle(solid.name, solid.definition.name, parent_handle),
                dependency_structure,
                pipeline_name,
            )

    return SystemNamedDict(name, fields)
Пример #19
0
    def result_for_handle(self, handle):
        """Get the result of a solid by its solid handle.

        This allows indexing into top-level solids to retrieve the results of children of
        composite solids.

        Args:
            handle (Union[str,SolidHandle]): The handle for the solid.

        Returns:
            Union[CompositeSolidExecutionResult, SolidExecutionResult]: The result of the given
            solid.
        """
        if isinstance(handle, six.string_types):
            handle = SolidHandle.from_string(handle)
        else:
            check.inst_param(handle, "handle", SolidHandle)

        solid = self.container.get_solid(handle)

        return self._result_for_handle(solid, handle)
Пример #20
0
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    # Get event_type
    event_type = HANDLED_EVENTS.get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict['inputName'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_UP_FOR_RETRY:
        event_specific_data = StepRetryData(
            error=error_from_data(event_dict['retryError']),
            seconds_to_wait=event_dict['secondsToWait'],
        )

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict['materialization']
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict['expectationResult'])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        event_specific_data = StepFailureData(
            error_from_data(event_dict['error']),
            UserFailureData(
                label=event_dict['failureMetadata']['label'],
                description=event_dict['failureMetadata']['description'],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ) if event_dict.get('failureMetadata') else None,
        )

    elif event_type == DagsterEventType.ENGINE_EVENT:
        event_specific_data = EngineEventData(
            metadata_entries=list(
                event_metadata_entries(event_dict.get('metadataEntries'))),
            marker_start=event_dict.get('markerStart'),
            marker_end=event_dict.get('markerEnd'),
            error=error_from_data(event_dict['engineError'])
            if event_dict.get('engineError') else None,
        )

    # We should update the GraphQL response so that clients don't need to do this handle parsing.
    # See: https://github.com/dagster-io/dagster/issues/1559
    handle = None
    step_key = None
    step_kind_value = None
    if 'step' in event_dict and event_dict['step']:
        step_key = event_dict['step']['key']
        step_kind_value = event_dict['step']['kind']
        keys = event_dict['step']['solidHandleID'].split('.')
        while keys:
            handle = SolidHandle(keys.pop(0), parent=handle)

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=step_key,
        solid_handle=handle,
        step_kind_value=step_kind_value,
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
Пример #21
0
    def _build_from_sorted_solids(
        self, solids, dependency_structure, parent_handle=None, parent_step_inputs=None
    ):
        terminal_transform_step = None
        for solid in solids:
            handle = SolidHandle(solid.name, solid.definition.name, parent_handle)

            ### 1. INPUTS
            # Create and add execution plan steps for solid inputs
            step_inputs = []
            for input_name, input_def in solid.definition.input_dict.items():
                prev_step_output_handle = get_input_source_step_handles(
                    self,
                    solid,
                    input_name,
                    input_def,
                    dependency_structure,
                    handle,
                    parent_step_inputs,
                )

                # We return None for the handle (see above in get_input_source_step_handle) when the
                # input def runtime type is "Nothing"
                if not prev_step_output_handle:
                    continue

                subplan = create_subplan_for_input(
                    self.pipeline_name,
                    self.environment_config,
                    solid,
                    prev_step_output_handle,
                    input_def,
                    handle,
                )

                self.add_steps(subplan.steps)

                step_inputs.append(
                    StepInput(
                        input_name, input_def.runtime_type, subplan.terminal_step_output_handle
                    )
                )

            ### 2. COMPUTE FUNCTION OR RECURSE
            # Create and add execution plan step for the solid compute function or
            # recurse over the solids in a CompositeSolid
            if isinstance(solid.definition, SolidDefinition):
                solid_transform_step = create_compute_step(
                    self.pipeline_name, self.environment_config, solid, step_inputs, handle
                )
                self.add_step(solid_transform_step)
                terminal_transform_step = solid_transform_step
            elif isinstance(solid.definition, CompositeSolidDefinition):
                terminal_transform_step = self._build_from_sorted_solids(
                    solids_in_topological_order(solid.definition),
                    solid.definition.dependency_structure,
                    parent_handle=handle,
                    parent_step_inputs=step_inputs,
                )
            else:
                check.invariant(
                    False,
                    'Unexpected solid type {type} encountered during execution planning'.format(
                        type=type(solid.definition)
                    ),
                )

            ### 3. OUTPUTS
            # Create and add execution plan steps (and output handles) for solid outputs
            for name, output_def in solid.definition.output_dict.items():
                subplan = create_subplan_for_output(
                    self.pipeline_name,
                    self.environment_config,
                    solid,
                    terminal_transform_step,
                    output_def,
                    handle,
                )
                self.add_steps(subplan.steps)

                output_handle = solid.output_handle(name)
                self.set_output_handle(output_handle, subplan.terminal_step_output_handle)

        return terminal_transform_step
Пример #22
0
    def _build_from_sorted_solids(
        self,
        solids: List[Solid],
        dependency_structure: DependencyStructure,
        parent_handle: Optional[SolidHandle] = None,
        parent_step_inputs: Optional[List[
            Union[StepInput, UnresolvedMappedStepInput,
                  UnresolvedCollectStepInput]]] = None,
    ):
        for solid in solids:
            handle = SolidHandle(solid.name, parent_handle)

            ### 1. INPUTS
            # Create and add execution plan steps for solid inputs
            has_unresolved_input = False
            has_pending_input = False
            step_inputs: List[Union[StepInput, UnresolvedMappedStepInput,
                                    UnresolvedCollectStepInput]] = []
            for input_name, input_def in solid.definition.input_dict.items():
                step_input_source = get_step_input_source(
                    self,
                    solid,
                    input_name,
                    input_def,
                    dependency_structure,
                    handle,
                    parent_step_inputs,
                )

                # If an input with dagster_type "Nothing" doesn't have a value
                # we don't create a StepInput
                if step_input_source is None:
                    continue

                if isinstance(
                        step_input_source,
                    (FromPendingDynamicStepOutput, FromUnresolvedStepOutput),
                ):
                    has_unresolved_input = True
                    step_inputs.append(
                        UnresolvedMappedStepInput(
                            name=input_name,
                            dagster_type_key=input_def.dagster_type.key,
                            source=step_input_source,
                        ))
                elif isinstance(step_input_source, FromDynamicCollect):
                    has_pending_input = True
                    step_inputs.append(
                        UnresolvedCollectStepInput(
                            name=input_name,
                            dagster_type_key=input_def.dagster_type.key,
                            source=step_input_source,
                        ))
                else:
                    check.inst_param(step_input_source, "step_input_source",
                                     StepInputSource)
                    step_inputs.append(
                        StepInput(
                            name=input_name,
                            dagster_type_key=input_def.dagster_type.key,
                            source=step_input_source,
                        ))

            ### 2a. COMPUTE FUNCTION
            # Create and add execution plan step for the solid compute function
            if isinstance(solid.definition, SolidDefinition):
                step_outputs = create_step_outputs(solid, handle,
                                                   self.environment_config)

                if has_pending_input and has_unresolved_input:
                    check.failed(
                        "Can not have pending and unresolved step inputs")

                elif has_unresolved_input:
                    new_step: IExecutionStep = UnresolvedMappedExecutionStep(
                        handle=UnresolvedStepHandle(solid_handle=handle),
                        pipeline_name=self.pipeline_name,
                        step_inputs=cast(
                            List[Union[StepInput, UnresolvedMappedStepInput]],
                            step_inputs),
                        step_outputs=step_outputs,
                        tags=solid.tags,
                    )
                elif has_pending_input:
                    new_step = UnresolvedCollectExecutionStep(
                        handle=StepHandle(solid_handle=handle),
                        pipeline_name=self.pipeline_name,
                        step_inputs=cast(
                            List[Union[StepInput, UnresolvedCollectStepInput]],
                            step_inputs),
                        step_outputs=step_outputs,
                        tags=solid.tags,
                    )
                else:
                    new_step = ExecutionStep(
                        handle=StepHandle(solid_handle=handle),
                        pipeline_name=self.pipeline_name,
                        step_inputs=cast(List[StepInput], step_inputs),
                        step_outputs=step_outputs,
                        tags=solid.tags,
                    )

                self.add_step(new_step)

            ### 2b. RECURSE
            # Recurse over the solids contained in an instance of GraphDefinition
            elif isinstance(solid.definition, GraphDefinition):
                self._build_from_sorted_solids(
                    solid.definition.solids_in_topological_order,
                    solid.definition.dependency_structure,
                    parent_handle=handle,
                    parent_step_inputs=step_inputs,
                )

            else:
                check.invariant(
                    False,
                    "Unexpected solid type {type} encountered during execution planning"
                    .format(type=type(solid.definition)),
                )

            ### 3. OUTPUTS
            # Create output handles for solid outputs
            for name, output_def in solid.definition.output_dict.items():
                output_handle = solid.output_handle(name)

                # Punch through layers of composition scope to map to the output of the
                # actual compute step
                resolved_output_def, resolved_handle = solid.definition.resolve_output_to_origin(
                    output_def.name, handle)
                step = self.get_step_by_solid_handle(resolved_handle)
                if isinstance(step,
                              (ExecutionStep, UnresolvedCollectExecutionStep)):
                    step_output_handle: Union[
                        StepOutputHandle,
                        UnresolvedStepOutputHandle] = StepOutputHandle(
                            step.key, resolved_output_def.name)
                elif isinstance(step, UnresolvedMappedExecutionStep):
                    step_output_handle = UnresolvedStepOutputHandle(
                        step.handle,
                        resolved_output_def.name,
                        step.resolved_by_step_key,
                        step.resolved_by_output_name,
                    )
                else:
                    check.failed(f"Unexpected step type {step}")

                self.set_output_handle(output_handle, step_output_handle)
Пример #23
0
 def get_step_by_solid_handle(self, handle: SolidHandle) -> IExecutionStep:
     check.inst_param(handle, "handle", SolidHandle)
     return self._steps[handle.to_string()]
Пример #24
0
    def _build_from_sorted_solids(self,
                                  solids,
                                  dependency_structure,
                                  parent_handle=None,
                                  parent_step_inputs=None):
        for solid in solids:
            handle = SolidHandle(solid.name, parent_handle)

            ### 1. INPUTS
            # Create and add execution plan steps for solid inputs
            has_unresolved_input = False
            step_inputs = []
            for input_name, input_def in solid.definition.input_dict.items():
                step_input_source = get_step_input_source(
                    self,
                    solid,
                    input_name,
                    input_def,
                    dependency_structure,
                    handle,
                    parent_step_inputs,
                )

                # If an input with dagster_type "Nothing" doesn't have a value
                # we don't create a StepInput
                if step_input_source is None:
                    continue

                if isinstance(
                        step_input_source,
                    (FromPendingDynamicStepOutput, FromUnresolvedStepOutput)):
                    has_unresolved_input = True
                    step_inputs.append(
                        UnresolvedStepInput(
                            name=input_name,
                            dagster_type=input_def.dagster_type,
                            source=step_input_source,
                        ))
                else:
                    check.inst_param(step_input_source, "step_input_source",
                                     StepInputSource)
                    step_inputs.append(
                        StepInput(
                            name=input_name,
                            dagster_type=input_def.dagster_type,
                            source=step_input_source,
                        ))

            ### 2a. COMPUTE FUNCTION
            # Create and add execution plan step for the solid compute function
            if isinstance(solid.definition, SolidDefinition):
                if has_unresolved_input:
                    step = create_unresolved_step(
                        solid=solid,
                        solid_handle=handle,
                        step_inputs=step_inputs,
                        environment_config=self.environment_config,
                        pipeline_name=self.pipeline_name,
                    )
                else:
                    step = create_compute_step(
                        solid=solid,
                        solid_handle=handle,
                        step_inputs=step_inputs,
                        environment_config=self.environment_config,
                        pipeline_name=self.pipeline_name,
                    )

                self.add_step(step)

            ### 2b. RECURSE
            # Recurse over the solids contained in an instance of GraphDefinition
            elif isinstance(solid.definition, GraphDefinition):
                self._build_from_sorted_solids(
                    solid.definition.solids_in_topological_order,
                    solid.definition.dependency_structure,
                    parent_handle=handle,
                    parent_step_inputs=step_inputs,
                )

            else:
                check.invariant(
                    False,
                    "Unexpected solid type {type} encountered during execution planning"
                    .format(type=type(solid.definition)),
                )

            ### 3. OUTPUTS
            # Create output handles for solid outputs
            for name, output_def in solid.definition.output_dict.items():
                output_handle = solid.output_handle(name)

                # Punch through layers of composition scope to map to the output of the
                # actual compute step
                resolved_output_def, resolved_handle = solid.definition.resolve_output_to_origin(
                    output_def.name, handle)
                step = self.get_step_by_solid_handle(resolved_handle)
                if isinstance(step, ExecutionStep):
                    step_output_handle = StepOutputHandle(
                        step.key, resolved_output_def.name)
                else:
                    step_output_handle = UnresolvedStepOutputHandle(
                        step.handle,
                        resolved_output_def.name,
                        step.resolved_by_step_key,
                        step.resolved_by_output_name,
                    )

                self.set_output_handle(output_handle, step_output_handle)
Пример #25
0
    def _build_from_sorted_solids(self,
                                  solids,
                                  dependency_structure,
                                  parent_handle=None,
                                  parent_step_inputs=None):
        terminal_compute_step = None
        for solid in solids:
            handle = SolidHandle(solid.name, solid.definition.name,
                                 parent_handle)

            ### 1. INPUTS
            # Create and add execution plan steps for solid inputs
            step_inputs = []
            for input_name, input_def in solid.definition.input_dict.items():
                step_input = get_step_input(
                    self,
                    solid,
                    input_name,
                    input_def,
                    dependency_structure,
                    handle,
                    parent_step_inputs,
                )

                # If an input with runtime_type "Nothing" doesnt have a value
                # we don't create a StepInput
                if step_input is None:
                    continue

                check.inst_param(step_input, 'step_input', StepInput)
                step_inputs.append(step_input)

            ### 2. COMPUTE FUNCTION OR RECURSE
            # Create and add execution plan step for the solid compute function or
            # recurse over the solids in a CompositeSolid
            if isinstance(solid.definition, SolidDefinition):
                solid_compute_step = create_compute_step(
                    self.pipeline_name, self.environment_config, solid,
                    step_inputs, handle)
                self.add_step(solid_compute_step)
                terminal_compute_step = solid_compute_step
            elif isinstance(solid.definition, CompositeSolidDefinition):
                terminal_compute_step = self._build_from_sorted_solids(
                    solids_in_topological_order(solid.definition),
                    solid.definition.dependency_structure,
                    parent_handle=handle,
                    parent_step_inputs=step_inputs,
                )
            else:
                check.invariant(
                    False,
                    'Unexpected solid type {type} encountered during execution planning'
                    .format(type=type(solid.definition)),
                )

            ### 3. OUTPUTS
            # Create and add execution plan steps (and output handles) for solid outputs
            for name, output_def in solid.definition.output_dict.items():
                subplan = create_subplan_for_output(self.pipeline_name, solid,
                                                    terminal_compute_step,
                                                    output_def)
                self.add_steps(subplan.steps)

                output_handle = solid.output_handle(name)
                self.set_output_handle(output_handle,
                                       subplan.terminal_step_output_handle)

        return terminal_compute_step
Пример #26
0
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, "event_dict", key_type=str)
    check.str_param(pipeline_name, "pipeline_name")

    # Get event_type
    event_type = HANDLED_EVENTS.get(event_dict["__typename"])
    if not event_type:
        raise Exception("unhandled event type %s" % event_dict["__typename"])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict["stepKey"],
                                                event_dict["outputName"]),
            type_check_data=TypeCheckData(
                success=event_dict["typeCheck"]["success"],
                label=event_dict["typeCheck"]["label"],
                description=event_dict.get("description"),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get("metadataEntries"))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict["inputName"],
            type_check_data=TypeCheckData(
                success=event_dict["typeCheck"]["success"],
                label=event_dict["typeCheck"]["label"],
                description=event_dict.get("description"),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get("metadataEntries"))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_UP_FOR_RETRY:
        event_specific_data = StepRetryData(
            error=error_from_data(event_dict["retryError"]),
            seconds_to_wait=event_dict["secondsToWait"],
        )

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict["materialization"]
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict["expectationResult"])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        event_specific_data = StepFailureData(
            error_from_data(event_dict["error"]),
            UserFailureData(
                label=event_dict["failureMetadata"]["label"],
                description=event_dict["failureMetadata"]["description"],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get("metadataEntries"))
                    or []),
            ) if event_dict.get("failureMetadata") else None,
        )

    elif event_type == DagsterEventType.ENGINE_EVENT:
        event_specific_data = EngineEventData(
            metadata_entries=list(
                event_metadata_entries(event_dict.get("metadataEntries"))),
            marker_start=event_dict.get("markerStart"),
            marker_end=event_dict.get("markerEnd"),
            error=error_from_data(event_dict["engineError"])
            if event_dict.get("engineError") else None,
        )

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict.get("stepKey"),
        solid_handle=SolidHandle.from_string(event_dict["solidHandleID"])
        if event_dict.get("solidHandleID") else None,
        # at the time of writing this:
        # * 'COMPUTE` is the only step kind
        # * this code should get deleted in the near future as we move away from
        #   dagster-graphql CLI as what we invoke in dask/k8s/etc.
        step_kind_value="COMPUTE" if event_dict.get("stepKey") else None,
        logging_tags=None,
        event_specific_data=event_specific_data,
    )
Пример #27
0
def _get_solid_handle_from_output(
        step_output_handle: StepOutputHandle) -> Optional[SolidHandle]:
    return SolidHandle.from_string(step_output_handle.step_key)
Пример #28
0
    def _build_from_sorted_solids(self,
                                  solids,
                                  dependency_structure,
                                  parent_handle=None,
                                  parent_step_inputs=None):
        for solid in solids:
            handle = SolidHandle(solid.name, solid.definition.name,
                                 parent_handle)

            ### 1. INPUTS
            # Create and add execution plan steps for solid inputs
            step_inputs = []
            for input_name, input_def in solid.definition.input_dict.items():
                step_input = get_step_input(
                    self,
                    solid,
                    input_name,
                    input_def,
                    dependency_structure,
                    handle,
                    parent_step_inputs,
                )

                # If an input with runtime_type "Nothing" doesnt have a value
                # we don't create a StepInput
                if step_input is None:
                    continue

                check.inst_param(step_input, 'step_input', StepInput)
                step_inputs.append(step_input)

            ### 2a. COMPUTE FUNCTION
            # Create and add execution plan step for the solid compute function
            if isinstance(solid.definition, SolidDefinition):
                solid_compute_step = create_compute_step(
                    self.pipeline_name, self.environment_config, solid,
                    step_inputs, handle)
                self.add_step(solid_compute_step)

            ### 2b. RECURSE
            # Recurse over the solids contained in an instance of CompositeSolidDefinition
            elif isinstance(solid.definition, CompositeSolidDefinition):
                self._build_from_sorted_solids(
                    solids_in_topological_order(solid.definition),
                    solid.definition.dependency_structure,
                    parent_handle=handle,
                    parent_step_inputs=step_inputs,
                )

            else:
                check.invariant(
                    False,
                    'Unexpected solid type {type} encountered during execution planning'
                    .format(type=type(solid.definition)),
                )

            ### 3. OUTPUTS
            # Create output handles for solid outputs
            for name, output_def in solid.definition.output_dict.items():
                output_handle = solid.output_handle(name)

                # Punch through layers of composition scope to map to the output of the
                # actual compute step
                resolved_output_def, resolved_handle = solid.definition.resolve_output_to_origin(
                    output_def.name, handle)
                compute_step = self.get_step_by_handle(resolved_handle)
                self.set_output_handle(
                    output_handle,
                    StepOutputHandle.from_step(compute_step,
                                               resolved_output_def.name),
                )
Пример #29
0
def test_multiline_logging_complex():
    msg = 'DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0'
    kwargs = {
        'pipeline':
        'example',
        'pipeline_name':
        'example',
        'step_key':
        'start.materialization.output.result.0',
        'solid':
        'start',
        'solid_definition':
        'emit_num',
        'dagster_event':
        DagsterEvent(
            event_type_value='STEP_FAILURE',
            pipeline_name='error_monster',
            step_key='start.materialization.output.result.0',
            solid_handle=SolidHandle('start', 'emit_num', None),
            step_kind_value='MATERIALIZATION_THUNK',
            logging_tags={
                'pipeline': 'error_monster',
                'step_key': 'start.materialization.output.result.0',
                'solid': 'start',
                'solid_definition': 'emit_num',
            },
            event_specific_data=StepFailureData(error=SerializableErrorInfo(
                message=
                "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
                stack=[
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n    yield\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _iterate_step_outputs_within_boundary\n    for step_output in gen:\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n    runtime_type.output_schema.materialize_runtime_value(config_spec, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n    return func(config_value, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n    return func(selector_key, selector_value, runtime_value)\n',
                    '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n    with open(json_file_path, \'w\') as ff:\n',
                ],
                cls_name='FileNotFoundError',
            )),
        ),
    }

    with _setup_logger(DAGSTER_DEFAULT_LOGGER) as (captured_results, logger):

        dl = DagsterLogManager('123', {}, [logger])
        dl.info(msg, **kwargs)

        kv_pairs = set(captured_results[0].split('\n')[1:])

    expected_pairs = [
        '        orig_message = "DagsterEventType.STEP_FAILURE for step start.materialization.output.result.0"',
        '              run_id = "123"',
        '            pipeline = "example"',
        '    solid_definition = "emit_num"',
        '       pipeline_name = "example"',
        '               solid = "start"',
        '            step_key = "start.materialization.output.result.0"',
    ]
    for e in expected_pairs:
        assert e in kv_pairs

    assert _regex_match_kv_pair(
        r'      log_message_id = "{0}"'.format(REGEX_UUID), kv_pairs)
    assert _regex_match_kv_pair(
        r'       log_timestamp = "{0}"'.format(REGEX_TS), kv_pairs)

    expected_dagster_event = {
        'event_specific_data': [[
            "FileNotFoundError: [Errno 2] No such file or directory: '/path/to/file'\n",
            [
                '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/errors.py", line 186, in user_code_error_boundary\n    yield\n',
                '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/simple_engine.py", line 365, in _iterate_step_outputs_within_boundary\n    for step_output in gen:\n',
                '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/execution_plan/materialization_thunk.py", line 28, in _fn\n    runtime_type.output_schema.materialize_runtime_value(config_spec, runtime_value)\n',
                '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 93, in materialize_runtime_value\n    return func(config_value, runtime_value)\n',
                '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/config_schema.py", line 110, in _selector\n    return func(selector_key, selector_value, runtime_value)\n',
                '  File "/Users/nate/src/dagster/python_modules/dagster/dagster/core/types/builtin_config_schemas.py", line 59, in _builtin_output_schema\n    with open(json_file_path, \'w\') as ff:\n',
            ],
            'FileNotFoundError',
        ]],
        'event_type_value':
        'STEP_FAILURE',
        'pipeline_name':
        'error_monster',
        'solid_handle': ['start', 'emit_num', None],
        'step_key':
        'start.materialization.output.result.0',
        'step_kind_value':
        'MATERIALIZATION_THUNK',
        'logging_tags': {
            'pipeline': 'error_monster',
            'solid': 'start',
            'solid_definition': 'emit_num',
            'step_key': 'start.materialization.output.result.0',
        },
    }
    dagster_event = json.loads([
        pair for pair in kv_pairs if 'dagster_event' in pair
    ][0].strip('       dagster_event = '))
    assert dagster_event == expected_dagster_event
Пример #30
0
def dagster_event_from_dict(event_dict, pipeline_name):
    check.dict_param(event_dict, 'event_dict', key_type=str)
    check.str_param(pipeline_name, 'pipeline_name')

    # Get event_type
    event_type = _handled_events().get(event_dict['__typename'])
    if not event_type:
        raise Exception('unhandled event type %s' % event_dict['__typename'])

    # Get event_specific_data
    event_specific_data = None
    if event_type == DagsterEventType.STEP_OUTPUT:
        event_specific_data = StepOutputData(
            step_output_handle=StepOutputHandle(event_dict['step']['key'],
                                                event_dict['outputName']),
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )

    elif event_type == DagsterEventType.STEP_INPUT:
        event_specific_data = StepInputData(
            input_name=event_dict['inputName'],
            type_check_data=TypeCheckData(
                success=event_dict['typeCheck']['success'],
                label=event_dict['typeCheck']['label'],
                description=event_dict.get('description'),
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ),
        )
    elif event_type == DagsterEventType.STEP_SUCCESS:
        event_specific_data = StepSuccessData(0.0)

    elif event_type == DagsterEventType.STEP_MATERIALIZATION:
        materialization = event_dict['materialization']
        event_specific_data = StepMaterializationData(
            materialization=materialization_from_data(materialization))
    elif event_type == DagsterEventType.STEP_EXPECTATION_RESULT:
        expectation_result = expectation_result_from_data(
            event_dict['expectationResult'])
        event_specific_data = StepExpectationResultData(expectation_result)

    elif event_type == DagsterEventType.STEP_FAILURE:
        error_info = SerializableErrorInfo(event_dict['error']['message'],
                                           stack=None,
                                           cls_name=None)
        event_specific_data = StepFailureData(
            error_info,
            UserFailureData(
                label=event_dict['failureMetadata']['label'],
                description=event_dict['failureMetadata']['description'],
                metadata_entries=list(
                    event_metadata_entries(event_dict.get('metadataEntries'))
                    or []),
            ) if event_dict.get('failureMetadata') else None,
        )

    # We should update the GraphQL response so that clients don't need to do this handle parsing.
    # See: https://github.com/dagster-io/dagster/issues/1559
    keys = event_dict['step']['solidHandleID'].split('.')
    handle = None
    while keys:
        handle = SolidHandle(keys.pop(0), definition_name=None, parent=handle)

    return DagsterEvent(
        event_type_value=event_type.value,
        pipeline_name=pipeline_name,
        step_key=event_dict['step']['key'],
        solid_handle=handle,
        step_kind_value=event_dict['step']['kind'],
        logging_tags=None,
        event_specific_data=event_specific_data,
    )