def test_get_run_times_for_backfill_job(db): now = timezone.now() now = now.replace( hour=12, minute=0, second=0, microsecond=0 ) job_spec = { 'cmd': 'fakejob', 'backfill': True, 'frequency': '1h', } with freezegun.freeze_time(now, tz_offset=0): # With backfill=True, but never been run, yield [now] last_success = None assert list(get_run_times(job_spec, last_success=last_success)) == [now] # frequency, but no time, last_success one hour ago, yield [now] last_success = now - datetime.timedelta(hours=1) actual = list(get_run_times(job_spec, last_success=last_success)) expected = [now] assert actual == expected # frequency, but no time, last_success three hours ago, yields # [2 hours ago, 1 hour ago, now] last_success = now - datetime.timedelta(hours=3) actual = list(get_run_times(job_spec, last_success=last_success)) expected = [ now - datetime.timedelta(hours=2), now - datetime.timedelta(hours=1), now ] assert actual == expected # frequency and time last_success = now - datetime.timedelta(hours=24) job_spec['frequency'] = '1d' job_spec['time'] = '10:30' actual = list(get_run_times(job_spec, last_success=last_success)) expected = [ datetime.datetime( year=now.year, month=now.month, day=now.day, hour=10, minute=30, second=0, microsecond=0, tzinfo=now.tzinfo ) ] assert actual == expected
def test_get_run_times(db): now = timezone.now() job_spec = {"cmd": "fakejob", "backfill": False} with freezegun.freeze_time(now, tz_offset=0): # With backfill=False, yield [now] assert list(get_run_times(job_spec, last_success=now)) == [now]
def test_get_run_times(db): now = timezone.now() job_spec = { 'cmd': 'fakejob', 'backfill': False, } with freezegun.freeze_time(now, tz_offset=0): # With backfill=False, yield [now] assert list(get_run_times(job_spec, last_success=now)) == [now]
def _run_one(self, job_spec, force=False, cmd_args=None): """Run a single job. :arg job_spec: job spec dict :arg force: forces the job to run even if it's not time to run :arg cmd_args: list of "--key=val" positional args as you would pass them on a command line """ cmd_args = cmd_args or [] cmd = job_spec['cmd'] # Make sure we have a job record before trying to run anything job = Job.objects.get_or_create(app_name=cmd)[0] if force: # If we're forcing the job, just run it without the bookkeeping. return self._run_job(job_spec, *cmd_args) # Figure out whether this job should be run now seconds = convert_frequency( job_spec.get('frequency', DEFAULT_FREQUENCY)) if not time_to_run(job_spec, job): logger.info("skipping %s: not time to run", cmd) return logger.info('about to run %s', cmd) now = timezone.now() log_run = True exc_type = exc_value = exc_tb = None start_time = None run_time = None with self.lock_job(job_spec['cmd']): try: cmd_kwargs = {} last_success = job.last_success # Backfill jobs can have multiple run-times, so we iterate # through all possible ones until either we get them all done # or it dies for run_time in get_run_times(job_spec, job.last_success): if job_spec.get('backfill', False): # If "backfill" is in the spec, then we want to pass in # run_time as an argument cmd_kwargs['run_time'] = format_datetime(run_time) if job_spec.get('last_success', False): # If "last_success" is in the spec, we want to pass in # the last_success as an argument cmd_kwargs['last_success'] = format_datetime( last_success) logger.info('running: %s %s %s', cmd, cmd_args, cmd_kwargs) start_time = time.time() self._run_job(job_spec, *cmd_args, **cmd_kwargs) end_time = time.time() logger.info('successfully ran %s on %s', cmd, run_time) last_success = run_time self._remember_success(cmd, last_success, end_time - start_time) except OngoingJobError: log_run = False raise except Exception: end_time = time.time() exc_type, exc_value, exc_tb = sys.exc_info() single_line_tb = (''.join( traceback.format_exception(*sys.exc_info())).replace( '\n', '\\n')) logger.error('error when running %s (%s): %s', cmd, run_time, single_line_tb) self._remember_failure(cmd, end_time - start_time, exc_type, exc_value, exc_tb) finally: if log_run: self._log_run(cmd, seconds, job_spec.get('time'), run_time, now, exc_type, exc_value, exc_tb)
def _run_one(self, job_spec, force=False, cmd_args=None): """Run a single job. :arg job_spec: job spec dict :arg force: forces the job to run even if it's not time to run :arg cmd_args: list of "--key=val" positional args as you would pass them on a command line """ cmd_args = cmd_args or [] cmd = job_spec["cmd"] # Make sure we have a job record before trying to run anything job = Job.objects.get_or_create(app_name=cmd)[0] if force: # If we're forcing the job, just run it without the bookkeeping. return self._run_job(job_spec, *cmd_args) # Figure out whether this job should be run now seconds = convert_frequency( job_spec.get("frequency", DEFAULT_FREQUENCY)) if not time_to_run(job_spec, job): logger.info("skipping %s: not time to run", cmd) return logger.info("about to run %s", cmd) now = timezone.now() start_time = None run_time = None with self.lock_job(job_spec["cmd"]): try: cmd_kwargs = {} last_success = job.last_success # Backfill jobs can have multiple run-times, so we iterate # through all possible ones until either we get them all done # or it dies for run_time in get_run_times(job_spec, job.last_success): if job_spec.get("backfill", False): # If "backfill" is in the spec, then we want to pass in # run_time as an argument cmd_kwargs["run_time"] = format_datetime(run_time) if job_spec.get("last_success", False): # If "last_success" is in the spec, we want to pass in # the last_success as an argument cmd_kwargs["last_success"] = format_datetime( last_success) logger.info("running: %s %s %s", cmd, cmd_args, cmd_kwargs) start_time = time.time() self._run_job(job_spec, *cmd_args, **cmd_kwargs) end_time = time.time() logger.info("successfully ran %s on %s", cmd, run_time) last_success = run_time self._remember_success(cmd, last_success, end_time - start_time) # Log each backfill task as a successful completion so that if # one of them fails, we start at the failure date rather than # all the way back at the beginning. self._log_run( cmd, seconds, job_spec.get("time"), run_time, now, ) except OngoingJobError: # Catch and raise this so it doesn't get handled by the Exception # handling raise except Exception: end_time = time.time() exc_type, exc_value, exc_tb = sys.exc_info() single_line_tb = "".join( traceback.format_exception(*sys.exc_info())).replace( "\n", "\\n") logger.error("error when running %s (%s): %s", cmd, run_time, single_line_tb) self._remember_failure(cmd, end_time - start_time, exc_type, exc_value, exc_tb) self._log_run( cmd, seconds, job_spec.get("time"), run_time, now, exc_type, exc_value, exc_tb, )
def _run_one(self, job_spec, force=False, cmd_args=None): """Run a single job. :arg job_spec: job spec dict :arg force: forces the job to run even if it's not time to run :arg cmd_args: list of "--key=val" positional args as you would pass them on a command line """ cmd_args = cmd_args or [] cmd = job_spec['cmd'] # Make sure we have a job record before trying to run anything job = Job.objects.get_or_create(app_name=cmd)[0] if force: # If we're forcing the job, just run it without the bookkeeping. return self._run_job(job_spec, *cmd_args) # Figure out whether this job should be run now seconds = convert_frequency(job_spec.get('frequency', DEFAULT_FREQUENCY)) if not time_to_run(job_spec, job): logger.info("skipping %s: not time to run", cmd) return logger.info('about to run %s', cmd) now = timezone.now() log_run = True exc_type = exc_value = exc_tb = None start_time = None run_time = None with self.lock_job(job_spec['cmd']): try: cmd_kwargs = {} last_success = job.last_success # Backfill jobs can have multiple run-times, so we iterate # through all possible ones until either we get them all done # or it dies for run_time in get_run_times(job_spec, job.last_success): if job_spec.get('backfill', False): # If "backfill" is in the spec, then we want to pass in # run_time as an argument cmd_kwargs['run_time'] = format_datetime(run_time) if job_spec.get('last_success', False): # If "last_success" is in the spec, we want to pass in # the last_success as an argument cmd_kwargs['last_success'] = format_datetime(last_success) logger.info('running: %s %s %s', cmd, cmd_args, cmd_kwargs) start_time = time.time() self._run_job(job_spec, *cmd_args, **cmd_kwargs) end_time = time.time() logger.info('successfully ran %s on %s', cmd, run_time) last_success = run_time self._remember_success(cmd, last_success, end_time - start_time) except OngoingJobError: log_run = False raise except Exception: end_time = time.time() exc_type, exc_value, exc_tb = sys.exc_info() single_line_tb = ( ''.join(traceback.format_exception(*sys.exc_info())) .replace('\n', '\\n') ) # Send error to sentry, log it, and remember the failure capture_error(settings.SENTRY_DSN) logger.error('error when running %s (%s): %s', cmd, run_time, single_line_tb) self._remember_failure( cmd, end_time - start_time, exc_type, exc_value, exc_tb ) finally: if log_run: self._log_run( cmd, seconds, job_spec.get('time'), run_time, now, exc_type, exc_value, exc_tb )