示例#1
0
    def _run_one(self, job_spec, force=False, cmd_args=None):
        """Run a single job.

        :arg job_spec: job spec dict
        :arg force: forces the job to run even if it's not time to run
        :arg cmd_args: list of "--key=val" positional args as you would pass
            them on a command line

        """
        cmd_args = cmd_args or []
        cmd = job_spec['cmd']

        # Make sure we have a job record before trying to run anything
        job = Job.objects.get_or_create(app_name=cmd)[0]

        if force:
            # If we're forcing the job, just run it without the bookkeeping.
            return self._run_job(job_spec, *cmd_args)

        # Figure out whether this job should be run now
        seconds = convert_frequency(
            job_spec.get('frequency', DEFAULT_FREQUENCY))
        if not time_to_run(job_spec, job):
            logger.info("skipping %s: not time to run", cmd)
            return

        logger.info('about to run %s', cmd)

        now = timezone.now()
        log_run = True
        exc_type = exc_value = exc_tb = None
        start_time = None
        run_time = None

        with self.lock_job(job_spec['cmd']):
            try:
                cmd_kwargs = {}
                last_success = job.last_success

                # Backfill jobs can have multiple run-times, so we iterate
                # through all possible ones until either we get them all done
                # or it dies
                for run_time in get_run_times(job_spec, job.last_success):
                    if job_spec.get('backfill', False):
                        # If "backfill" is in the spec, then we want to pass in
                        # run_time as an argument
                        cmd_kwargs['run_time'] = format_datetime(run_time)

                    if job_spec.get('last_success', False):
                        # If "last_success" is in the spec, we want to pass in
                        # the last_success as an argument
                        cmd_kwargs['last_success'] = format_datetime(
                            last_success)

                    logger.info('running: %s %s %s', cmd, cmd_args, cmd_kwargs)

                    start_time = time.time()
                    self._run_job(job_spec, *cmd_args, **cmd_kwargs)
                    end_time = time.time()

                    logger.info('successfully ran %s on %s', cmd, run_time)
                    last_success = run_time
                    self._remember_success(cmd, last_success,
                                           end_time - start_time)

            except OngoingJobError:
                log_run = False
                raise

            except Exception:
                end_time = time.time()
                exc_type, exc_value, exc_tb = sys.exc_info()
                single_line_tb = (''.join(
                    traceback.format_exception(*sys.exc_info())).replace(
                        '\n', '\\n'))

                logger.error('error when running %s (%s): %s', cmd, run_time,
                             single_line_tb)
                self._remember_failure(cmd, end_time - start_time, exc_type,
                                       exc_value, exc_tb)

            finally:
                if log_run:
                    self._log_run(cmd, seconds, job_spec.get('time'), run_time,
                                  now, exc_type, exc_value, exc_tb)
示例#2
0
def test_convert_frequency_error():
    with pytest.raises(FrequencyDefinitionError):
        convert_frequency('1t')
示例#3
0
    def _run_one(self, job_spec, force=False, cmd_args=None):
        """Run a single job.

        :arg job_spec: job spec dict
        :arg force: forces the job to run even if it's not time to run
        :arg cmd_args: list of "--key=val" positional args as you would pass
            them on a command line

        """
        cmd_args = cmd_args or []
        cmd = job_spec["cmd"]

        # Make sure we have a job record before trying to run anything
        job = Job.objects.get_or_create(app_name=cmd)[0]

        if force:
            # If we're forcing the job, just run it without the bookkeeping.
            return self._run_job(job_spec, *cmd_args)

        # Figure out whether this job should be run now
        seconds = convert_frequency(
            job_spec.get("frequency", DEFAULT_FREQUENCY))
        if not time_to_run(job_spec, job):
            logger.info("skipping %s: not time to run", cmd)
            return

        logger.info("about to run %s", cmd)

        now = timezone.now()
        start_time = None
        run_time = None

        with self.lock_job(job_spec["cmd"]):
            try:
                cmd_kwargs = {}
                last_success = job.last_success

                # Backfill jobs can have multiple run-times, so we iterate
                # through all possible ones until either we get them all done
                # or it dies
                for run_time in get_run_times(job_spec, job.last_success):
                    if job_spec.get("backfill", False):
                        # If "backfill" is in the spec, then we want to pass in
                        # run_time as an argument
                        cmd_kwargs["run_time"] = format_datetime(run_time)

                    if job_spec.get("last_success", False):
                        # If "last_success" is in the spec, we want to pass in
                        # the last_success as an argument
                        cmd_kwargs["last_success"] = format_datetime(
                            last_success)

                    logger.info("running: %s %s %s", cmd, cmd_args, cmd_kwargs)

                    start_time = time.time()
                    self._run_job(job_spec, *cmd_args, **cmd_kwargs)
                    end_time = time.time()

                    logger.info("successfully ran %s on %s", cmd, run_time)
                    last_success = run_time

                    self._remember_success(cmd, last_success,
                                           end_time - start_time)

                    # Log each backfill task as a successful completion so that if
                    # one of them fails, we start at the failure date rather than
                    # all the way back at the beginning.
                    self._log_run(
                        cmd,
                        seconds,
                        job_spec.get("time"),
                        run_time,
                        now,
                    )

            except OngoingJobError:
                # Catch and raise this so it doesn't get handled by the Exception
                # handling
                raise

            except Exception:
                end_time = time.time()
                exc_type, exc_value, exc_tb = sys.exc_info()
                single_line_tb = "".join(
                    traceback.format_exception(*sys.exc_info())).replace(
                        "\n", "\\n")

                logger.error("error when running %s (%s): %s", cmd, run_time,
                             single_line_tb)
                self._remember_failure(cmd, end_time - start_time, exc_type,
                                       exc_value, exc_tb)
                self._log_run(
                    cmd,
                    seconds,
                    job_spec.get("time"),
                    run_time,
                    now,
                    exc_type,
                    exc_value,
                    exc_tb,
                )
示例#4
0
def test_convert_frequency_good(freq, expected):
    assert convert_frequency(freq) == expected
示例#5
0
    def _run_one(self, job_spec, force=False, cmd_args=None):
        """Run a single job.

        :arg job_spec: job spec dict
        :arg force: forces the job to run even if it's not time to run
        :arg cmd_args: list of "--key=val" positional args as you would pass
            them on a command line

        """
        cmd_args = cmd_args or []
        cmd = job_spec['cmd']

        # Make sure we have a job record before trying to run anything
        job = Job.objects.get_or_create(app_name=cmd)[0]

        if force:
            # If we're forcing the job, just run it without the bookkeeping.
            return self._run_job(job_spec, *cmd_args)

        # Figure out whether this job should be run now
        seconds = convert_frequency(job_spec.get('frequency', DEFAULT_FREQUENCY))
        if not time_to_run(job_spec, job):
            logger.info("skipping %s: not time to run", cmd)
            return

        logger.info('about to run %s', cmd)

        now = timezone.now()
        log_run = True
        exc_type = exc_value = exc_tb = None
        start_time = None
        run_time = None

        with self.lock_job(job_spec['cmd']):
            try:
                cmd_kwargs = {}
                last_success = job.last_success

                # Backfill jobs can have multiple run-times, so we iterate
                # through all possible ones until either we get them all done
                # or it dies
                for run_time in get_run_times(job_spec, job.last_success):
                    if job_spec.get('backfill', False):
                        # If "backfill" is in the spec, then we want to pass in
                        # run_time as an argument
                        cmd_kwargs['run_time'] = format_datetime(run_time)

                    if job_spec.get('last_success', False):
                        # If "last_success" is in the spec, we want to pass in
                        # the last_success as an argument
                        cmd_kwargs['last_success'] = format_datetime(last_success)

                    logger.info('running: %s %s %s', cmd, cmd_args, cmd_kwargs)

                    start_time = time.time()
                    self._run_job(job_spec, *cmd_args, **cmd_kwargs)
                    end_time = time.time()

                    logger.info('successfully ran %s on %s', cmd, run_time)
                    last_success = run_time
                    self._remember_success(cmd, last_success, end_time - start_time)

            except OngoingJobError:
                log_run = False
                raise

            except Exception:
                end_time = time.time()
                exc_type, exc_value, exc_tb = sys.exc_info()
                single_line_tb = (
                    ''.join(traceback.format_exception(*sys.exc_info()))
                    .replace('\n', '\\n')
                )

                # Send error to sentry, log it, and remember the failure
                capture_error(settings.SENTRY_DSN)
                logger.error('error when running %s (%s): %s', cmd, run_time, single_line_tb)
                self._remember_failure(
                    cmd,
                    end_time - start_time,
                    exc_type,
                    exc_value,
                    exc_tb
                )

            finally:
                if log_run:
                    self._log_run(
                        cmd,
                        seconds,
                        job_spec.get('time'),
                        run_time,
                        now,
                        exc_type, exc_value, exc_tb
                    )