Пример #1
0
 def in_process(pid, step_keys_to_execute=None):
     check.int_param(pid, 'pid')
     check.opt_list_param(step_keys_to_execute, 'step_keys_to_execute')
     return EngineEventData(
         metadata_entries=[EventMetadataEntry.text(str(pid), 'pid')] +
         ([EventMetadataEntry.text(str(step_keys_to_execute), 'step_keys'
                                   )] if step_keys_to_execute else []))
Пример #2
0
 def multiprocess(pid, step_keys_to_execute=None):
     check.int_param(pid, "pid")
     check.opt_list_param(step_keys_to_execute, "step_keys_to_execute")
     return EngineEventData(
         metadata_entries=[EventMetadataEntry.text(str(pid), "pid")] +
         ([EventMetadataEntry.text(str(step_keys_to_execute), "step_keys"
                                   )] if step_keys_to_execute else []))
Пример #3
0
 def multiprocess(pid, parent_pid=None, step_keys_to_execute=None):
     check.int_param(pid, 'pid')
     check.opt_int_param(parent_pid, 'parent_pid')
     check.opt_set_param(step_keys_to_execute, 'step_keys_to_execute')
     return EngineEventData(
         metadata_entries=[EventMetadataEntry.text(str(pid), 'pid')] +
         ([EventMetadataEntry.text(str(parent_pid), 'parent_pid'
                                   )] if parent_pid else []) +
         ([EventMetadataEntry.text(str(step_keys_to_execute), 'step_keys'
                                   )] if step_keys_to_execute else []))
Пример #4
0
    def resource_init_success(
        pipeline_name, execution_plan, log_manager, resource_instances, resource_init_times
    ):
        from dagster.core.execution.plan.plan import ExecutionPlan

        metadata_entries = []
        for resource_key in resource_instances.keys():
            resource_obj = resource_instances[resource_key]
            resource_time = resource_init_times[resource_key]
            metadata_entries.append(
                EventMetadataEntry.python_artifact(
                    resource_obj.__class__, resource_key, "Initialized in {}".format(resource_time)
                )
            )

        return DagsterEvent.from_resource(
            pipeline_name=check.str_param(pipeline_name, "pipeline_name"),
            execution_plan=check.inst_param(execution_plan, "execution_plan", ExecutionPlan),
            log_manager=check.inst_param(log_manager, "log_manager", DagsterLogManager),
            message="Finished initialization of resources [{}].".format(
                ", ".join(sorted(resource_init_times.keys()))
            ),
            event_specific_data=EngineEventData(
                metadata_entries=metadata_entries,
                marker_end="resources",
            ),
        )
Пример #5
0
    def resource_init_success(execution_plan, log_manager, resource_instances,
                              resource_init_times):
        from dagster.core.execution.plan.plan import ExecutionPlan

        metadata_entries = []
        for resource_key in resource_instances.keys():
            resource_obj = resource_instances[resource_key]
            resource_time = resource_init_times[resource_key]
            metadata_entries.append(
                EventMetadataEntry.python_artifact(
                    resource_obj.__class__, resource_key,
                    'Initialized in {}'.format(resource_time)))

        return DagsterEvent.from_resource(
            execution_plan=check.inst_param(execution_plan, 'execution_plan',
                                            ExecutionPlan),
            log_manager=check.inst_param(log_manager, 'log_manager',
                                         DagsterLogManager),
            message='Finished initialization of resources [{}].'.format(
                ', '.join(resource_init_times.keys())),
            event_specific_data=EngineEventData(
                metadata_entries=metadata_entries,
                marker_end='resources',
            ),
        )
Пример #6
0
    def object_store_operation(step_context, object_store_operation_result):
        object_store_name = ('{object_store_name} '.format(
            object_store_name=object_store_operation_result.object_store_name)
                             if object_store_operation_result.object_store_name
                             else '')

        serialization_strategy_modifier = (
            ' using {serialization_strategy_name}'.format(
                serialization_strategy_name=object_store_operation_result.
                serialization_strategy_name) if
            object_store_operation_result.serialization_strategy_name else '')

        value_name = object_store_operation_result.value_name

        if (ObjectStoreOperationType(object_store_operation_result.op) ==
                ObjectStoreOperationType.SET_OBJECT):
            message = (
                'Stored intermediate object for output {value_name} in '
                '{object_store_name}object store{serialization_strategy_modifier}.'
            ).format(
                value_name=value_name,
                object_store_name=object_store_name,
                serialization_strategy_modifier=serialization_strategy_modifier,
            )
        elif (ObjectStoreOperationType(object_store_operation_result.op) ==
              ObjectStoreOperationType.GET_OBJECT):
            message = (
                'Retrieved intermediate object for input {value_name} in '
                '{object_store_name}object store{serialization_strategy_modifier}.'
            ).format(
                value_name=value_name,
                object_store_name=object_store_name,
                serialization_strategy_modifier=serialization_strategy_modifier,
            )
        elif (ObjectStoreOperationType(object_store_operation_result.op) ==
              ObjectStoreOperationType.CP_OBJECT):
            message = (
                'Copied intermediate object for input {value_name} from {key} to {dest_key}'
            ).format(
                value_name=value_name,
                key=object_store_operation_result.key,
                dest_key=object_store_operation_result.dest_key,
            )
        else:
            message = ''

        return DagsterEvent.from_step(
            DagsterEventType.OBJECT_STORE_OPERATION,
            step_context,
            event_specific_data=ObjectStoreOperationResultData(
                op=object_store_operation_result.op,
                value_name=value_name,
                metadata_entries=[
                    EventMetadataEntry.path(object_store_operation_result.key,
                                            label='key')
                ],
            ),
            message=message,
        )
Пример #7
0
 def _do_expectation(_info, df):
     ge_df = ge.from_pandas(df)
     ge_result = ge_callback(ge_df)
     check.invariant('success' in ge_result)
     return ExpectationResult(
         success=ge_result['success'],
         metadata_entries=[
             EventMetadataEntry.json(label='result', data=ge_result)
         ],
     )
Пример #8
0
    def resource_init_success(execution_plan, log_manager,
                              resource_init_times):
        from dagster.core.execution.plan.plan import ExecutionPlan

        metadata_entries = [
            EventMetadataEntry.text('Initialized in {}.'.format(resource_time),
                                    resource_name)
            for resource_name, resource_time in resource_init_times.items()
        ]
        return DagsterEvent.from_resource(
            execution_plan=check.inst_param(execution_plan, 'execution_plan',
                                            ExecutionPlan),
            log_manager=check.inst_param(log_manager, 'log_manager',
                                         DagsterLogManager),
            message='Finished initialization of resources [{}].'.format(
                ', '.join(resource_init_times.keys())),
            event_specific_data=EngineEventData(
                metadata_entries=metadata_entries,
                marker_end='resources',
            ),
        )
Пример #9
0
 def _file_passes(_info, df):
     with open(file_path) as ff:
         expt_config = json.load(ff)
         # This is necessary because ge ends up coercing a type change
         # on the same dataframe instance, changing the type. But a
         # ge can't be copied, because it causes an error.
         # The error is
         #  AttributeError: 'PandasDataset' object has no attribute
         #  'discard_subset_failing_expectations'
         # See https://github.com/great-expectations/great_expectations/issues/342
         df_copy = copy.deepcopy(df)
         ge_df = ge.from_pandas(df_copy, expectations_config=expt_config)
         validate_result = ge_df.validate()
         check.invariant('success' in validate_result)
         check.invariant('results' in validate_result)
         return ExpectationResult(
             success=validate_result['success'],
             metadata_entries=[
                 EventMetadataEntry.json(label='result',
                                         data=validate_result)
             ],
         )
Пример #10
0
 def interrupted(steps_interrupted):
     check.list_param(steps_interrupted, 'steps_interrupted', str)
     return EngineEventData(metadata_entries=[
         EventMetadataEntry.text(str(steps_interrupted),
                                 'steps_interrupted')
     ])
Пример #11
0
    def object_store_operation(step_context, object_store_operation_result):
        from dagster.core.definitions.events import ObjectStoreOperation

        check.inst_param(
            object_store_operation_result, "object_store_operation_result", ObjectStoreOperation
        )

        object_store_name = (
            "{object_store_name} ".format(
                object_store_name=object_store_operation_result.object_store_name
            )
            if object_store_operation_result.object_store_name
            else ""
        )

        serialization_strategy_modifier = (
            " using {serialization_strategy_name}".format(
                serialization_strategy_name=object_store_operation_result.serialization_strategy_name
            )
            if object_store_operation_result.serialization_strategy_name
            else ""
        )

        value_name = object_store_operation_result.value_name

        if (
            ObjectStoreOperationType(object_store_operation_result.op)
            == ObjectStoreOperationType.SET_OBJECT
        ):
            message = (
                "Stored intermediate object for output {value_name} in "
                "{object_store_name}object store{serialization_strategy_modifier}."
            ).format(
                value_name=value_name,
                object_store_name=object_store_name,
                serialization_strategy_modifier=serialization_strategy_modifier,
            )
        elif (
            ObjectStoreOperationType(object_store_operation_result.op)
            == ObjectStoreOperationType.GET_OBJECT
        ):
            message = (
                "Retrieved intermediate object for input {value_name} in "
                "{object_store_name}object store{serialization_strategy_modifier}."
            ).format(
                value_name=value_name,
                object_store_name=object_store_name,
                serialization_strategy_modifier=serialization_strategy_modifier,
            )
        elif (
            ObjectStoreOperationType(object_store_operation_result.op)
            == ObjectStoreOperationType.CP_OBJECT
        ):
            message = (
                "Copied intermediate object for input {value_name} from {key} to {dest_key}"
            ).format(
                value_name=value_name,
                key=object_store_operation_result.key,
                dest_key=object_store_operation_result.dest_key,
            )
        else:
            message = ""

        return DagsterEvent.from_step(
            DagsterEventType.OBJECT_STORE_OPERATION,
            step_context,
            event_specific_data=ObjectStoreOperationResultData(
                op=object_store_operation_result.op,
                value_name=value_name,
                address=object_store_operation_result.key,
                metadata_entries=[
                    EventMetadataEntry.path(object_store_operation_result.key, label="key")
                ],
                version=object_store_operation_result.version,
                mapping_key=object_store_operation_result.mapping_key,
            ),
            message=message,
        )