Пример #1
0
        def _wrapped_fn(context):
            since = context.last_completion_time if context.last_completion_time else 0
            if not os.path.isdir(directory_name):
                yield SkipReason(
                    f"Could not find directory named {directory_name}.")
                return

            fileinfo_since = []
            for filename in os.listdir(directory_name):
                filepath = os.path.join(directory_name, filename)
                if not os.path.isfile(filepath):
                    continue
                fstats = os.stat(filepath)
                if fstats.st_mtime > since:
                    fileinfo_since.append((filename, fstats.st_mtime))

            result = fn(context, fileinfo_since)

            if inspect.isgenerator(result):
                for item in result:
                    yield item
            elif isinstance(result, (SkipReason, RunRequest)):
                yield result

            elif result is not None:
                raise DagsterInvariantViolationError(
                    f"Error in sensor {sensor_name}: Sensor unexpectedly returned output "
                    f"{result} of type {type(result)}.  Should only return SkipReason or "
                    "RunRequest objects.")
Пример #2
0
def skip_cursor_sensor(context):
    if not context.cursor:
        cursor = 1
    else:
        cursor = int(context.cursor) + 1

    context.update_cursor(str(cursor))
    return SkipReason()
Пример #3
0
    def toy_file_sensor(_, modified_fileinfo):
        if not modified_fileinfo:
            yield SkipReason("No modified files")

        for filename, mtime in modified_fileinfo:
            yield RunRequest(
                run_key="{}:{}".format(filename, str(mtime)),
                run_config={
                    "solids": {
                        "read_file": {
                            "config": {
                                "directory": directory_name,
                                "filename": filename
                            }
                        }
                    }
                },
            )
Пример #4
0
def custom_interval_sensor(_context):
    return SkipReason()
Пример #5
0
def simple_sensor(context):
    if not context.last_completion_time or not int(
            context.last_completion_time) % 2:
        return SkipReason()

    return RunRequest(run_key=None, run_config={}, tags={})
Пример #6
0
 def never_no_config_sensor(_):
     return SkipReason("never")
Пример #7
0
        def _wrapped_fn(context: SensorEvaluationContext):
            # initiate the cursor to (most recent event id, current timestamp) when:
            # * it's the first time starting the sensor
            # * or, the cursor isn't in valid format (backcompt)
            if context.cursor is None or not RunStatusSensorCursor.is_valid(
                    context.cursor):
                most_recent_event_records = list(
                    context.instance.get_event_records(ascending=False,
                                                       limit=1))
                most_recent_event_id = (most_recent_event_records[0].storage_id
                                        if len(most_recent_event_records) == 1
                                        else -1)

                new_cursor = RunStatusSensorCursor(
                    update_timestamp=pendulum.now("UTC").isoformat(),
                    record_id=most_recent_event_id,
                )
                context.update_cursor(new_cursor.to_json())
                yield SkipReason(
                    f"Initiating {name}. Set cursor to {new_cursor}")
                return

            record_id, update_timestamp = RunStatusSensorCursor.from_json(
                context.cursor)

            # Fetch events after the cursor id
            # * we move the cursor forward to the latest visited event's id to avoid revisits
            # * when the daemon is down, bc we persist the cursor info, we can go back to where we
            #   left and backfill alerts for the qualified events (up to 5 at a time) during the downtime
            # Note: this is a cross-run query which requires extra handling in sqlite, see details in SqliteEventLogStorage.
            event_records = context.instance.get_event_records(
                EventRecordsFilter(
                    after_cursor=RunShardedEventsCursor(
                        id=record_id,
                        run_updated_after=pendulum.parse(update_timestamp)),
                    event_type=PIPELINE_RUN_STATUS_TO_EVENT_TYPE[
                        pipeline_run_status],
                ),
                ascending=True,
                limit=5,
            )

            for event_record in event_records:
                event_log_entry = event_record.event_log_entry
                storage_id = event_record.storage_id

                # get run info
                run_records = context.instance.get_run_records(
                    filters=PipelineRunsFilter(
                        run_ids=[event_log_entry.run_id]))
                check.invariant(len(run_records) == 1)
                pipeline_run = run_records[0].pipeline_run
                update_timestamp = run_records[0].update_timestamp

                # skip if any of of the followings happens:
                if (
                        # the pipeline does not have a repository (manually executed)
                        not pipeline_run.external_pipeline_origin or
                        # the pipeline does not belong to the current repository
                        pipeline_run.external_pipeline_origin.
                        external_repository_origin.repository_name !=
                        context.repository_name or
                        # if pipeline is not selected
                    (pipeline_selection
                     and pipeline_run.pipeline_name not in pipeline_selection
                     )):
                    context.update_cursor(
                        RunStatusSensorCursor(
                            record_id=storage_id,
                            update_timestamp=update_timestamp.isoformat()).
                        to_json())
                    continue

                serializable_error = None

                try:
                    with user_code_error_boundary(
                            RunStatusSensorExecutionError,
                            lambda:
                            f'Error occurred during the execution sensor "{name}".',
                    ):
                        # one user code invocation maps to one failure event
                        run_status_sensor_fn(
                            RunStatusSensorContext(
                                sensor_name=name,
                                pipeline_run=pipeline_run,
                                dagster_event=event_log_entry.dagster_event,
                            ))
                except RunStatusSensorExecutionError as run_status_sensor_execution_error:
                    # When the user code errors, we report error to the sensor tick not the original run.
                    serializable_error = serializable_error_info_from_exc_info(
                        run_status_sensor_execution_error.original_exc_info)

                context.update_cursor(
                    RunStatusSensorCursor(record_id=storage_id,
                                          update_timestamp=update_timestamp.
                                          isoformat()).to_json())

                # Yield PipelineRunReaction to indicate the execution success/failure.
                # The sensor machinery would
                # * report back to the original run if success
                # * update cursor and job state
                yield PipelineRunReaction(
                    pipeline_run=pipeline_run,
                    error=serializable_error,
                )