def _gen_base_attributes(item_id=None): values = {} if item_id is None: values['id'] = str(uuid.uuid4()) values['created_at'] = timeutils.utcnow() values['updated_at'] = timeutils.utcnow() return copy.deepcopy(values)
def create(self, request, body): if (body is None or body.get('job') is None or body['job'].get('schedule_id') is None): raise webob.exc.HTTPBadRequest() job = body['job'] try: schedule = self.db_api.schedule_get_by_id(job['schedule_id']) except exception.NotFound: raise webob.exc.HTTPNotFound() # Check integrity of schedule and update next run expected_next_run = job.get('next_run') if expected_next_run: expected_next_run = timeutils.parse_isotime(job.get('next_run')) next_run = api_utils.schedule_to_next_run(schedule, timeutils.utcnow()) try: self.db_api.schedule_test_and_set_next_run(schedule['id'], expected_next_run, next_run) except exception.NotFound: msg = _("Specified next run does not match the current next run" " value. This could mean schedule has either changed" "or has already been scheduled since you last expected.") raise webob.exc.HTTPConflict(explanation=msg) # Update schedule last_scheduled values = {} values['last_scheduled'] = timeutils.utcnow() self.db_api.schedule_update(schedule['id'], values) # Create job values = {} values.update(job) values['tenant'] = schedule['tenant'] values['action'] = schedule['action'] values['status'] = 'QUEUED' job_metadata = [] for metadata in schedule['schedule_metadata']: job_metadata.append({ 'key': metadata['key'], 'value': metadata['value'] }) values['job_metadata'] = job_metadata job_action = values['action'] if not 'timeout' in values: values['timeout'] = api_utils.get_new_timeout_by_action(job_action) values['hard_timeout'] = \ api_utils.get_new_timeout_by_action(job_action) job = self.db_api.job_create(values) utils.serialize_datetimes(job) api_utils.serialize_job_metadata(job) job = {'job': job} utils.generate_notification(None, 'qonos.job.create', job, 'INFO') return job
def test_schedule_test_and_set_next_run_invalid(self): fixture = { 'id': str(uuid.uuid4()), 'tenant': str(uuid.uuid4()), 'action': 'snapshot', 'minute': 30, 'hour': 2, } bad_expected_next_run = timeutils.utcnow() timeutils.advance_time_seconds(10) schedule = self.db_api.schedule_create(fixture) self.assertRaises(exception.NotFound, self.db_api.schedule_test_and_set_next_run, schedule['id'], bad_expected_next_run, timeutils.utcnow())
def job_get_and_assign_next_by_action(action, worker_id, new_timeout): """Get the next available job for the given action and assign it to the worker for worker_id. This must be an atomic action!""" job_ref = None now = timeutils.utcnow().replace(second=0, microsecond=0) jobs = _jobs_get_sorted() statuses = ['DONE', 'CANCELLED', 'HARD_TIMED_OUT', 'MAX_RETRIED'] for job in jobs: if job['action'] == action and \ job['status'] not in statuses and \ (job['worker_id'] is None or job['timeout'] <= now): job_ref = job break if job_ref is None: return None job_id = job_ref['id'] DATA['jobs'][job_id]['worker_id'] = worker_id DATA['jobs'][job_id]['timeout'] = new_timeout DATA['jobs'][job_id]['retry_count'] = job_ref['retry_count'] + 1 DATA['jobs'][job_id]['version_id'] = str(uuid.uuid4()) job = copy.deepcopy(DATA['jobs'][job_id]) job['job_metadata'] = job_meta_get_all_by_job_id(job_id) return job
def test_cron_string_to_datetime(self): minute = timeutils.utcnow().minute if minute == 0: minute = 59 else: minute -= 1 hour = timeutils.utcnow().hour if hour == 0: hour = 23 else: hour -= 1 next_run = utils.cron_string_to_next_datetime(minute=minute, hour=hour) self.assertTrue(next_run > timeutils.utcnow())
def test_create_zero_hour(self): hour = 0 fixture = {'schedule': { 'id': unit_utils.SCHEDULE_UUID5, 'tenant': unit_utils.TENANT1, 'action': 'snapshot', 'minute': 30, 'hour': hour, }} expected = fixture['schedule'] request = unit_utils.get_fake_request(method='POST') actual = self.controller.create(request, fixture)['schedule'] self.assertNotEqual(actual.get('id'), None) self.assertNotEqual(actual.get('created_at'), None) self.assertNotEqual(actual.get('updated_at'), None) now = timeutils.utcnow() if not (now.hour == hour and now.minute < 30): now = now + datetime.timedelta(days=1) expected_next_run = timeutils.isotime( now.replace(hour=hour, minute=30, second=0, microsecond=0)) self.assertEqual(expected_next_run, actual['next_run']) self.assertEqual(expected['tenant'], actual['tenant']) self.assertEqual(expected['action'], actual['action']) self.assertEqual(expected['minute'], actual['minute']) self.assertEqual(expected['hour'], actual['hour'])
def job_update(job_id, job_values): global DATA values = job_values.copy() if job_id not in DATA['jobs']: raise exception.NotFound() metadata = None if 'job_metadata' in values: metadata = values['job_metadata'] del values['job_metadata'] if len(values) > 0: job = DATA['jobs'][job_id] #NOTE(ameade): This must come before update specified values since # we may be trying to manually set updated_at job['updated_at'] = timeutils.utcnow() job['version_id'] = str(uuid.uuid4()) job.update(values) if metadata is not None: DATA['job_metadata'][job_id] = {} for metadatum in metadata: job_meta_create(job_id, metadatum) return job_get_by_id(job_id)
def _create_jobs(self): now = timeutils.utcnow() timeout = now + datetime.timedelta(hours=1) hard_timeout = now + datetime.timedelta(hours=4) fixture = { 'id': unit_utils.JOB_UUID1, 'action': 'snapshot', 'tenant': unit_utils.TENANT1, 'schedule_id': unit_utils.SCHEDULE_UUID1, 'worker_id': unit_utils.WORKER_UUID1, 'status': 'queued', 'timeout': timeout, 'hard_timeout': hard_timeout, 'retry_count': 0, } self.job_1 = self.db_api.job_create(fixture) fixture = { 'id': unit_utils.JOB_UUID2, 'action': 'snapshot', 'tenant': unit_utils.TENANT1, 'schedule_id': unit_utils.SCHEDULE_UUID2, 'worker_id': unit_utils.WORKER_UUID2, 'status': 'error', 'timeout': timeout, 'hard_timeout': hard_timeout, 'retry_count': 0, } self.job_2 = self.db_api.job_create(fixture)
def job_get_and_assign_next_by_action(action, worker_id, max_retry, new_timeout): """Get the next available job for the given action and assign it to the worker for worker_id. This must be an atomic action!""" job_ref = None now = timeutils.utcnow() jobs = _jobs_get_sorted() for job in jobs: if job['action'] == action and \ job['retry_count'] < max_retry and \ job['hard_timeout'] > now and \ job['status'] not in ['DONE', 'CANCELLED'] and \ (job['worker_id'] is None or job['timeout'] <= now): job_ref = job break if job_ref is None: return None job_id = job_ref['id'] DATA['jobs'][job_id]['worker_id'] = worker_id DATA['jobs'][job_id]['timeout'] = new_timeout DATA['jobs'][job_id]['retry_count'] = job_ref['retry_count'] + 1 DATA['jobs'][job_id]['version_id'] = str(uuid.uuid4()) job = copy.deepcopy(DATA['jobs'][job_id]) job['job_metadata'] = job_meta_get_all_by_job_id(job_id) return job
def test_get_next_job_assigned_once_due_to_timeout(self): now = timeutils.utcnow() timeout = now - datetime.timedelta(hours=1) new_timeout = now + datetime.timedelta(hours=3) hard_timeout = now + datetime.timedelta(hours=4) job_fixture = { 'action': 'snapshot', 'tenant': unit_utils.TENANT1, 'schedule_id': unit_utils.SCHEDULE_UUID2, 'worker_id': unit_utils.WORKER_UUID2, 'status': 'queued', 'timeout': timeout, 'hard_timeout': hard_timeout, 'retry_count': 0, } retries = 2 self._create_jobs(10, job_fixture) job = db_api.job_get_and_assign_next_by_action('snapshot', unit_utils.WORKER_UUID1, retries, new_timeout) job2 = db_api.job_get_and_assign_next_by_action('snapshot', unit_utils.WORKER_UUID1, retries, new_timeout) self.assertEqual(job2, None)
def _create_jobs(self, gap, *fixtures): now = timeutils.utcnow() self.jobs = [] for fixture in fixtures: self.jobs.append(self.db_api.job_create(fixture)) timeutils.advance_time_seconds(gap) return now
def job_get_and_assign_next_by_action(action, worker_id, max_retry, new_timeout): """Get the next available job for the given action and assign it to the worker for worker_id. This must be an atomic action!""" job_ref = None now = timeutils.utcnow() jobs = _jobs_get_sorted() for job in jobs: if job['action'] == action and \ job['retry_count'] < max_retry and \ job['hard_timeout'] > now and \ job['status'] not in ['DONE', 'CANCELLED'] and \ (job['worker_id'] is None or job['timeout'] <= now): job_ref = job break if job_ref is None: return None job_id = job_ref['id'] DATA['jobs'][job_id]['worker_id'] = worker_id DATA['jobs'][job_id]['timeout'] = new_timeout DATA['jobs'][job_id]['retry_count'] = job_ref['retry_count'] + 1 job = copy.deepcopy(DATA['jobs'][job_id]) job['job_metadata'] = job_meta_get_all_by_job_id(job_id) return job
def job_get_and_assign_next_by_action(action, worker_id, max_retry, new_timeout): """Get the next available job for the given action and assign it to the worker for worker_id.""" now = timeutils.utcnow() session = get_session() job_ref = _job_get_next_by_action(session, now, action, max_retry) if not job_ref: return None # Make sure the job has not changed unexpectedly since # retrieving it try: query = session.query(models.Job).filter_by(id=job_ref['id'])\ .filter_by(updated_at=job_ref['updated_at'])\ .update({'worker_id': worker_id, 'timeout': new_timeout, 'retry_count': job_ref['retry_count'] + 1}) except sa_orm.exc.NoResultFound: #In case the job was deleted during assignment return nothing return None if not query: return None return _job_get_by_id(job_ref['id'])
def job_get_and_assign_next_by_action(action, worker_id, max_retry, new_timeout): """Get the next available job for the given action and assign it to the worker for worker_id.""" now = timeutils.utcnow() session = get_session() job_ref = _job_get_next_by_action(session, now, action, max_retry) if not job_ref: return None # Make sure the job has not changed unexpectedly since # retrieving it try: query = ( session.query(models.Job) .filter_by(id=job_ref["id"]) .update({"worker_id": worker_id, "timeout": new_timeout, "retry_count": job_ref["retry_count"] + 1}) ) except sa_orm.exc.NoResultFound: # In case the job was deleted during assignment return nothing return None except sa_orm.exc.StaleDataError: # In case the job was picked up by another transaction return nothing return None if not query: return None return _job_get_by_id(job_ref["id"])
def test_create_zero_hour(self): hour = 0 fixture = { "schedule": { "id": unit_utils.SCHEDULE_UUID5, "tenant": unit_utils.TENANT1, "action": "snapshot", "minute": 30, "hour": hour, } } expected = fixture["schedule"] request = unit_utils.get_fake_request(method="POST") actual = self.controller.create(request, fixture)["schedule"] self.assertNotEqual(actual.get("id"), None) self.assertNotEqual(actual.get("created_at"), None) self.assertNotEqual(actual.get("updated_at"), None) now = timeutils.utcnow() if not (now.hour == hour and now.minute < 30): now = now + datetime.timedelta(days=1) expected_next_run = timeutils.isotime(now.replace(hour=hour, minute=30, second=0, microsecond=0)) self.assertEqual(expected_next_run, actual["next_run"]) self.assertEqual(expected["tenant"], actual["tenant"]) self.assertEqual(expected["action"], actual["action"]) self.assertEqual(expected["minute"], actual["minute"]) self.assertEqual(expected["hour"], actual["hour"])
def fake_next_datetime(min, h, dom, m, dow, start_time): self.called = True self.assertEqual(min, '*') self.assertEqual(h, '*') self.assertEqual(dom, '*') self.assertEqual(m, '*') self.assertEqual(dow, '*') self.assertEqual(timeutils.utcnow(), start_time)
def get_new_timeout_by_action(action): now = timeutils.utcnow() group = 'action_' + action if group not in CONF: group = 'action_default' job_timeout_seconds = CONF.get(group).timeout_seconds return now + datetime.timedelta(seconds=job_timeout_seconds)
def _jobs_cleanup_hard_timed_out(): """Find all jobs with hard_timeout values which have passed and delete them, logging the timeout / failure as appropriate""" now = timeutils.utcnow() session = get_session() num_del = session.query(models.Job).filter(models.Job.hard_timeout <= now).delete() session.flush() return num_del
def test_process_job_should_update_status_and_timestamp(self): timeutils.set_time_override() base_time = timeutils.utcnow() time_seq = [ base_time, base_time, base_time + datetime.timedelta(seconds=305), base_time + datetime.timedelta(minutes=60, seconds=5), base_time + datetime.timedelta(minutes=60, seconds=305), ] timeutils.set_time_override_seq(time_seq) job = copy.deepcopy(self.job) job['timeout'] = base_time + datetime.timedelta(minutes=60) self.nova_client.servers.get(mox.IsA(str)).AndReturn(MockServer()) self.nova_client.servers.create_image( mox.IsA(str), mox.IsA(str), self.snapshot_meta).AndReturn(IMAGE_ID) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('QUEUED')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('ACTIVE')) mock_retention = MockRetention() self.nova_client.rax_scheduled_images_python_novaclient_ext.\ get(mox.IsA(str)).AndReturn(mock_retention) self._init_worker_mock() self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=mox.IsA(datetime.datetime), error_message=None) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.mox.StubOutWithMock(utils, 'generate_notification') utils.generate_notification(None, 'qonos.job.run.start', mox.IsA(dict), mox.IsA(str)) utils.generate_notification(None, 'qonos.job.update', mox.IsA(dict), mox.IsA(str)).MultipleTimes() utils.generate_notification(None, 'qonos.job.run.end', mox.IsA(dict), mox.IsA(str)) self.worker.update_job(fakes.JOB_ID, 'DONE', timeout=None, error_message=None) self.mox.ReplayAll() processor = TestableSnapshotProcessor(self.nova_client) processor.init_processor(self.worker) processor.process_job(job) self.mox.VerifyAll()
def _create_images_list(self, instance_id, image_count): images = [] base_time = timeutils.utcnow() one_day = datetime.timedelta(days=1) for i in range(image_count): images.append(self._create_image(instance_id, base_time)) base_time = base_time - one_day return images
def test_get_next_job_too_many_retries(self): now = timeutils.utcnow() new_timeout = now + datetime.timedelta(hours=3) now = timeutils.utcnow() retries = 2 self.job_fixture_2['retry_count'] = 3 self.job_fixture_2['timeout'] = now + datetime.timedelta(seconds=5) self._create_jobs(10, self.job_fixture_2, self.job_fixture_1) job = db_api.job_get_and_assign_next_by_action('snapshot', unit_utils.WORKER_UUID1, retries, new_timeout) expected = self.jobs[1] self.assertEqual(job['id'], expected['id']) self.assertEqual(job['worker_id'], unit_utils.WORKER_UUID1) self.assertEqual(job['timeout'], new_timeout) self.assertEqual(job['hard_timeout'], expected['hard_timeout']) self.assertEqual(job['retry_count'], expected['retry_count'] + 1)
def schedule_to_next_run(schedule, start_time=None): start_time = start_time or timeutils.utcnow() minute = schedule.get('minute', '*') hour = schedule.get('hour', '*') day_of_month = schedule.get('day_of_month', '*') month = schedule.get('month', '*') day_of_week = schedule.get('day_of_week', '*') return utils.cron_string_to_next_datetime(minute, hour, day_of_month, month, day_of_week, start_time)
def _assign_jobs_for_concurrent_workers(self, workers): new_timeout = timeutils.utcnow() + datetime.timedelta(hours=3) worker1_job = base.db_api.job_get_and_assign_next_by_action( 'snapshot', workers[0], new_timeout) worker2_job = base.db_api.job_get_and_assign_next_by_action( 'snapshot', workers[1], new_timeout) return worker1_job, worker2_job
def _create_jobs(self): now = timeutils.utcnow() timeout = now + datetime.timedelta(hours=1) hard_timeout = now + datetime.timedelta(hours=4) fixture = { "id": unit_utils.JOB_UUID1, "schedule_id": self.schedule_1["id"], "tenant": unit_utils.TENANT1, "worker_id": None, "action": "snapshot", "status": None, "timeout": timeout, "hard_timeout": hard_timeout, "retry_count": 0, } self.job_1 = db_api.job_create(fixture) fixture = { "id": unit_utils.JOB_UUID2, "schedule_id": self.schedule_2["id"], "tenant": unit_utils.TENANT2, "worker_id": unit_utils.WORKER_UUID2, "action": "snapshot", "status": None, "timeout": timeout, "hard_timeout": hard_timeout, "retry_count": 1, "job_metadata": [{"key": "instance_id", "value": "my_instance"}], } self.job_2 = db_api.job_create(fixture) fixture = { "id": unit_utils.JOB_UUID3, "schedule_id": self.schedule_3["id"], "tenant": unit_utils.TENANT3, "worker_id": unit_utils.WORKER_UUID2, "action": "snapshot", "status": None, "timeout": timeout, "hard_timeout": hard_timeout, "retry_count": 1, "job_metadata": [{"key": "instance_id", "value": "my_instance"}], } self.job_3 = db_api.job_create(fixture) fixture = { "id": unit_utils.JOB_UUID4, "schedule_id": self.schedule_4["id"], "tenant": unit_utils.TENANT4, "worker_id": unit_utils.WORKER_UUID2, "action": "snapshot", "status": None, "timeout": timeout, "hard_timeout": hard_timeout, "retry_count": 1, "job_metadata": [{"key": "instance_id", "value": "my_instance"}], } self.job_4 = db_api.job_create(fixture)
def _jobs_cleanup_hard_timed_out(): """Find all jobs with hard_timeout values which have passed and delete them, logging the timeout / failure as appropriate""" now = timeutils.utcnow() session = get_session() num_del = session.query(models.Job)\ .filter(models.Job.hard_timeout <= now)\ .delete() session.flush() return num_del
def _create_basic_job(self): now = timeutils.utcnow() timeout = now + datetime.timedelta(hours=1) hard_timeout = now + datetime.timedelta(hours=4) return db_api.job_create({ 'action': 'snapshot', 'timeout': timeout, 'hard_timeout': hard_timeout, 'tenant': unit_utils.TENANT1 })
def cron_string_to_next_datetime(minute="*", hour="*", day_of_month="*", month="*", day_of_week="*", start_time=None): start_time = start_time or timeutils.utcnow() cron_string = ("%s %s %s %s %s" % (_default_if_none(minute, '*'), _default_if_none(hour, '*'), _default_if_none(day_of_month, '*'), _default_if_none(month, '*'), _default_if_none(day_of_week, '*'))) iter = croniter(cron_string, start_time) return iter.get_next(datetime.datetime)
def _after_cursor_execute(conn, cursor, statement, parameters, context, executemany): method = conn.info.get('query_method') start_time = conn.info.get('query_start_time') if start_time: now = timeutils.utcnow() total = (now - start_time).total_seconds() else: total = -1.0 msg = "Query time for '%s': %f. Query statement: %s" LOG.debug(msg % (method, total, statement))
def test_process_job_should_update_status_and_timestamp(self): base_time = timeutils.utcnow() time_seq = [ base_time, base_time, base_time + datetime.timedelta(seconds=305), base_time + datetime.timedelta(minutes=60, seconds=5), base_time + datetime.timedelta(minutes=60, seconds=305), ] timeutils.set_time_override_seq(time_seq) job = copy.deepcopy(self.job) job['timeout'] = base_time + datetime.timedelta(minutes=60) self.nova_client.servers.get(mox.IsA(str)).AndReturn(MockServer()) self.nova_client.servers.create_image(mox.IsA(str), mox.IsA(str), self.snapshot_meta).AndReturn(IMAGE_ID) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('QUEUED')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('ACTIVE')) mock_retention = MockRetention() self.nova_client.rax_scheduled_images_python_novaclient_ext.\ get(mox.IsA(str)).AndReturn(mock_retention) self._init_worker_mock() self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=mox.IsA(datetime.datetime), error_message=None) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.mox.StubOutWithMock(utils, 'generate_notification') utils.generate_notification(None, 'qonos.job.run.start', mox.IsA(dict), mox.IsA(str)) utils.generate_notification(None, 'qonos.job.update', mox.IsA(dict), mox.IsA(str)).MultipleTimes() utils.generate_notification(None, 'qonos.job.run.end', mox.IsA(dict), mox.IsA(str)) self.worker.get_qonos_client().AndReturn(self.qonos_client) self.qonos_client.delete_schedule(mox.IsA(str)) self.worker.update_job(fakes.JOB_ID, 'DONE', timeout=None, error_message=None) self.mox.ReplayAll() processor = TestableSnapshotProcessor(self.nova_client) processor.init_processor(self.worker) processor.process_job(job) self.mox.VerifyAll()
def _do_test_process_job_should_update_image_error(self, error_status): base_time = timeutils.utcnow() time_seq = [ base_time, base_time, base_time + datetime.timedelta(seconds=305), base_time + datetime.timedelta(seconds=605), base_time + datetime.timedelta(seconds=905), base_time + datetime.timedelta(seconds=1205), base_time + datetime.timedelta(seconds=1505), ] timeutils.set_time_override_seq(time_seq) job = copy.deepcopy(self.job) job['timeout'] = base_time + datetime.timedelta(minutes=60) self.nova_client.servers.get(mox.IsA(str)).AndReturn(MockServer()) self.nova_client.servers.create_image(mox.IsA(str), mox.IsA(str), self.snapshot_meta).AndReturn(IMAGE_ID) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('QUEUED')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( error_status) self._init_worker_mock() self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.worker.update_job(fakes.JOB_ID, 'ERROR', timeout=None, error_message=mox.IsA(str)) self.mox.StubOutWithMock(utils, 'generate_notification') utils.generate_notification(None, 'qonos.job.run.start', mox.IsA(dict), mox.IsA(str)) self.mox.ReplayAll() processor = TestableSnapshotProcessor(self.nova_client) processor.init_processor(self.worker) processor.process_job(job) self.mox.VerifyAll()
def test_process_job_should_exponentially_increates_timeout(self): status = MockImageStatus('ERROR') job = copy.deepcopy(self.job) self._do_test_process_job_should_update_image_error(status, job=job) self._reset_mocks() new_now = timeutils.utcnow() + datetime.timedelta(minutes=120) timeutils.clear_time_override() timeutils.set_time_override(new_now) job['status'] = 'ERROR' job['retry_count'] = 2 self._do_test_process_job_should_update_image_error(status, include_create=False, include_queued=False, is_retry=True, job=job)
def job_get_and_assign_next_by_action(action, worker_id, max_retry, new_timeout): """Get the next available job for the given action and assign it to the worker for worker_id.""" now = timeutils.utcnow() session = get_session() job_ref = _job_get_next_by_action(session, now, action, max_retry) if not job_ref: return None job_id = job_ref['id'] try: job_values = { 'worker_id': worker_id, 'timeout': new_timeout, 'retry_count': job_ref['retry_count'] + 1 } job_ref.update(job_values) job_ref.save(session=session) except sa_orm.exc.NoResultFound: #In case the job was deleted during assignment return nothing LOG.warn( _('[JOB2WORKER] NoResultFound:' ' Could not assign the job to worker_id: %(worker_id)s' ' NoResultFound for job_id: %(job_id)s.') % { 'worker_id': job_values['worker_id'], 'job_id': job_id }) return None except sa_orm.exc.StaleDataError: #In case the job was picked up by another transaction return nothing LOG.warn( _('[JOB2WORKER] StaleDataError:' ' Could not assign the job to worker_id: %(worker_id)s' ' Job already assigned to another worker,' ' job_id: %(job_id)s.') % { 'worker_id': job_values['worker_id'], 'job_id': job_id }) return None LOG.info( _('[JOB2WORKER] Assigned Job: %(job_id)s' ' To Worker: %(worker_id)s') % { 'job_id': job_id, 'worker_id': job_values['worker_id'] }) return _job_get_by_id(job_id)
def test_polling_job_timeout_extension_with_max_retries(self): timeout_extension = 3600 job_timeout_max_updates_count = 3 self.config(job_timeout_extension_sec=timeout_extension, group='snapshot_worker') self.config(job_timeout_max_updates=job_timeout_max_updates_count, group='snapshot_worker') self.config(job_timeout_initial_value_sec=10800, group='snapshot_worker') server = self.server_instance_fixture("INSTANCE_ID", "test") job = self.job_fixture(server.id) images = [ self.image_fixture('IMAGE_ID', 'QUEUED', server.id), self.image_fixture('IMAGE_ID', 'SAVING', server.id), self.image_fixture('IMAGE_ID', 'SAVING', server.id), self.image_fixture('IMAGE_ID', 'SAVING', server.id) ] now = timeutils.utcnow() timeutils.set_time_override(now) timeutils.advance_time_delta( datetime.timedelta(seconds=timeout_extension)) try: with TestableSnapshotProcessor(job, server, images) as p: p.next_timeout = now + p.initial_timeout p.next_update = now + p.update_interval #NOTE(venkatesh): unfortunately had to use a protected method # for testing. Else there seems to be no easier way to test # this scenario. we need to fix this as part of refactoring # SnapshotJobProcessor. while True: try: p._update_job(job['id'], 'PROCESSING') except exception.OutOfTimeException: break timeutils.advance_time_delta( datetime.timedelta(seconds=timeout_extension)) total_timeout_duration = datetime.timedelta( seconds=(timeout_extension * job_timeout_max_updates_count)) self.assertEqual( now + (p.initial_timeout + total_timeout_duration), p.next_timeout) self.assertEqual(3, p.timeout_count) finally: timeutils.clear_time_override()
def test_polling_job_timeout_extension_with_max_retries(self): timeout_extension = 3600 job_timeout_max_updates_count = 3 self.config(job_timeout_extension_sec=timeout_extension, group='snapshot_worker') self.config(job_timeout_max_updates=job_timeout_max_updates_count, group='snapshot_worker') self.config(job_timeout_initial_value_sec=10800, group='snapshot_worker') server = self.server_instance_fixture("INSTANCE_ID", "test") job = self.job_fixture(server.id) images = [self.image_fixture('IMAGE_ID', 'QUEUED', server.id), self.image_fixture('IMAGE_ID', 'SAVING', server.id), self.image_fixture('IMAGE_ID', 'SAVING', server.id), self.image_fixture('IMAGE_ID', 'SAVING', server.id)] now = timeutils.utcnow() timeutils.set_time_override(now) timeutils.advance_time_delta( datetime.timedelta(seconds=timeout_extension)) try: with TestableSnapshotProcessor(job, server, images) as p: p.next_timeout = now + p.initial_timeout p.next_update = now + p.update_interval # NOTE(venkatesh): unfortunately had to use a protected method # for testing. Else there seems to be no easier way to test # this scenario. we need to fix this as part of refactoring # SnapshotJobProcessor. while True: try: p._update_job(job['id'], 'PROCESSING') except exception.OutOfTimeException: break timeutils.advance_time_delta( datetime.timedelta(seconds=timeout_extension)) total_timeout_duration = datetime.timedelta( seconds=(timeout_extension * job_timeout_max_updates_count) ) self.assertEqual( now + (p.initial_timeout + total_timeout_duration), p.next_timeout ) self.assertEqual(3, p.timeout_count) finally: timeutils.clear_time_override()
def _before_cursor_execute(conn, cursor, statement, parameters, context, executemany): stack = inspect.stack() try: # NOTE(alaski): stack is a list of tuples like (_, filename, _, # method_name, _, _) where _ are irrelevant slots. The list is # filtered to find the call through sqlalchemy/api.py and then # the method_name is pulled from that stack frame. db_frames = filter(lambda x: x[1].endswith('sqlalchemy/api.py'), stack) method = db_frames[0][3] except IndexError: method = 'unknown' conn.info['query_start_time'] = timeutils.utcnow() conn.info['query_method'] = method
def cron_string_to_next_datetime(minute="*", hour="*", day_of_month="*", month="*", day_of_week="*", start_time=None): start_time = start_time or timeutils.utcnow() cron_string = ( "%s %s %s %s %s" % (_default_if_none(minute, '*'), _default_if_none(hour, '*'), _default_if_none(day_of_month, '*'), _default_if_none( month, '*'), _default_if_none(day_of_week, '*'))) iter = croniter(cron_string, start_time) return iter.get_next(datetime.datetime)
def _jobs_cleanup_hard_timed_out(): """Find all jobs with hard_timeout values which have passed and delete them, logging the timeout / failure as appropriate""" now = timeutils.utcnow() del_ids = [] for job_id in DATA['jobs']: job = DATA['jobs'][job_id] print now, job['hard_timeout'] print now - job['hard_timeout'] if (now - job['hard_timeout']) > datetime.timedelta(microseconds=0): del_ids.append(job_id) for job_id in del_ids: job_delete(job_id) return len(del_ids)
def test_process_job_should_exponentially_increase_timeout(self): status = MockImageStatus('ERROR') job = copy.deepcopy(self.job) self._do_test_process_job_should_update_image_error(status, job=job) self._reset_mocks() new_now = timeutils.utcnow() + datetime.timedelta(minutes=120) timeutils.clear_time_override() timeutils.set_time_override(new_now) job['status'] = 'ERROR' job['retry_count'] = 2 job['hard_timeout'] = timeutils.strtime( at=(new_now + datetime.timedelta(minutes=120))) self._do_test_process_job_should_update_image_error( status, include_create=False, include_queued=False, is_retry=True, job=job)
def test_notifications_for_cancelled_job_on_hard_timeout_reached(self): server = self.server_instance_fixture("INSTANCE_ID", "test") now = timeutils.utcnow() expired_hard_timeout = now - datetime.timedelta(hours=4) job = self.job_fixture( server.id, hard_timeout=timeutils.strtime(expired_hard_timeout)) with TestableSnapshotProcessor(job, server, []) as processor: processor.process_job(job) self.assertEqual('HARD_TIMED_OUT', job['status']) expected_notifications = [ ('qonos.job.run.start', 'INFO', 'QUEUED'), ('qonos.job.failed', 'ERROR', 'HARD_TIMED_OUT') ] self.assert_job_notification_events(processor, expected_notifications)
def test_process_job_should_fail_if_hard_timed_out(self): mox.Reset(self.worker) self.mox.StubOutWithMock(utils, 'generate_notification') now = timeutils.utcnow() self.job['hard_timeout'] = timeutils.strtime(at=now) utils.generate_notification(None, 'qonos.job.run.start', mox.IsA(dict), mox.IsA(str)) self.worker.update_job(fakes.JOB_ID, 'HARD_TIMED_OUT', timeout=None, error_message=mox.IsA(str))\ .AndReturn({'status': 'HARD_TIMED_OUT', 'timeout': self.job['timeout']}) expected_payload = { 'job': { 'status': 'HARD_TIMED_OUT', 'hard_timeout': self.job['hard_timeout'], 'created_at': self.job['created_at'], 'modified_at': self.job['modified_at'], 'retry_count': 1, 'schedule_id': '33333333-3333-3333-3333-33333333', 'worker_id': '11111111-1111-1111-1111-11111111', 'timeout': self.job['timeout'], 'action': 'snapshot', 'id': '22222222-2222-2222-2222-22222222', 'tenant': '44444444-4444-4444-4444-44444444', 'metadata': { 'instance_id': '55555555-5555-5555-5555-55555555' } } } utils.generate_notification(None, 'qonos.job.failed', expected_payload, mox.IsA(str)) self.mox.ReplayAll() processor = TestableSnapshotProcessor(self.nova_client) processor.init_processor(self.worker) processor.process_job(self.job) self.mox.VerifyAll()
def test_schedule_to_next_run_start_time(self): expected_start_time = timeutils.utcnow() - datetime.timedelta(2) self.called = False def fake_next_datetime(min, h, dom, m, dow, start_time): self.called = True self.assertEqual(min, '*') self.assertEqual(h, '*') self.assertEqual(dom, '*') self.assertEqual(m, '*') self.assertEqual(dow, '*') self.assertEqual(expected_start_time, start_time) self.stubs.Set(utils, 'cron_string_to_next_datetime', fake_next_datetime) api_utils.schedule_to_next_run({}, expected_start_time) self.assertTrue(self.called)
def job_fixture(self, instance_id): now = timeutils.utcnow() timeout = now + datetime.timedelta(hours=1) hard_timeout = now + datetime.timedelta(hours=4) fixture = { 'id': 'JOB_1', 'schedule_id': 'SCH_1', 'tenant': 'TENANT1', 'worker_id': 'WORKER_1', 'action': 'snapshot', 'status': 'QUEUED', 'timeout': timeout, 'hard_timeout': hard_timeout, 'retry_count': 0, 'metadata': { 'instance_id': instance_id, 'value': 'my_instance', }, } return fixture
def schedule_update(schedule_id, schedule_values): global DATA values = schedule_values.copy() if schedule_id not in DATA['schedules']: raise exception.NotFound() metadata = None if 'schedule_metadata' in values: metadata = values['schedule_metadata'] del values['schedule_metadata'] if len(values) > 0: schedule = DATA['schedules'][schedule_id] schedule['updated_at'] = timeutils.utcnow() schedule.update(values) if metadata is not None: DATA['schedule_metadata'][schedule_id] = {} for metadatum in metadata: schedule_meta_create(schedule_id, metadatum) return schedule_get_by_id(schedule_id)
def test_process_job_should_cancel_if_job_hard_timed_out(self): server = self.server_instance_fixture("INSTANCE_ID", "test") now = timeutils.utcnow() expired_hard_timeout = now - datetime.timedelta(hours=4) job = self.job_fixture( server.id, hard_timeout=timeutils.strtime(expired_hard_timeout)) with TestableSnapshotProcessor(job, server, []) as processor: processor.process_job(job) self.assert_update_job_statuses(processor, ['HARD_TIMED_OUT']) self.assertEqual('HARD_TIMED_OUT', job['status']) error_msg = ('Job %(job_id)s has reached/exceeded its' ' hard timeout: %(hard_timeout)s.' % { 'job_id': job['id'], 'hard_timeout': job['hard_timeout'] }) expected_status_values = { 'status': 'HARD_TIMED_OUT', 'error_message': error_msg } self.assert_job_status_values(processor, expected_status_values)
def _do_test_process_job_should_update_image_error(self, error_status, include_create=True, include_queued=True, is_retry=False, job=None): base_time = timeutils.utcnow() time_seq = [ base_time, base_time, base_time, base_time + datetime.timedelta(seconds=305), base_time + datetime.timedelta(seconds=605), base_time + datetime.timedelta(seconds=905), base_time + datetime.timedelta(seconds=1205), base_time + datetime.timedelta(seconds=1505), ] timeutils.set_time_override_seq(time_seq) if job is None: job = copy.deepcopy(self.job) job['timeout'] = base_time + datetime.timedelta(minutes=60) if include_create: self.nova_client.servers.get(mox.IsA(str)).AndReturn(MockServer()) self.nova_client.servers.create_image( mox.IsA(str), mox.IsA(str), self.snapshot_meta).AndReturn(IMAGE_ID) if include_queued: self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('QUEUED')) else: self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn( MockImageStatus('SAVING')) self.nova_client.images.get(IMAGE_ID).AndReturn(error_status) self._init_worker_mock(skip_metadata_update=(not include_create)) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) self.worker.update_job(fakes.JOB_ID, 'PROCESSING', timeout=None, error_message=None) metadata = copy.copy(job['metadata']) metadata['image_id'] = IMAGE_ID backoff_factor = DEFAULT_BACKOFF_FACTOR**job['retry_count'] timeout = time_seq[-1] + datetime.timedelta( minutes=DEFAULT_TIMEOUT_INCR * backoff_factor) self.worker.update_job(fakes.JOB_ID, 'ERROR', timeout=timeout, error_message=mox.IsA(unicode)).AndReturn({ 'status': 'ERROR', 'timeout': job['timeout'] }) self.mox.StubOutWithMock(utils, 'generate_notification') expected_job = copy.deepcopy(job) expected_job['status'] = 'ERROR' expected_job['metadata'] = metadata if not is_retry: utils.generate_notification(None, 'qonos.job.run.start', mox.IsA(dict), mox.IsA(str)) else: utils.generate_notification(None, 'qonos.job.retry', mox.IsA(dict), mox.IsA(str)) utils.generate_notification(None, 'qonos.job.update', mox.IsA(dict), mox.IsA(str)) def assert_job_payload(job_payload): self.assertTrue('error_message' in job_payload['job']) del job_payload['job']['error_message'] self.assertEquals({'job': expected_job}, job_payload) return True utils.generate_notification(None, 'qonos.job.update', mox.Func(assert_job_payload), 'ERROR') self.mox.ReplayAll() processor = TestableSnapshotProcessor(self.nova_client) processor.init_processor(self.worker) processor.process_job(job) self.mox.VerifyAll()
def _get_utcnow(self): now = timeutils.utcnow() print "Returning NOW: %s" % str(now) return now
def create(self, request, body): if (body is None or body.get('job') is None or body['job'].get('schedule_id') is None): raise webob.exc.HTTPBadRequest() job = body['job'] try: schedule = self.db_api.schedule_get_by_id(job['schedule_id']) except exception.NotFound: raise webob.exc.HTTPNotFound() # Check integrity of schedule and update next run expected_next_run = job.get('next_run') if expected_next_run: try: expected_next_run = timeutils.parse_isotime(expected_next_run) expected_next_run = expected_next_run.replace(tzinfo=None) except ValueError as e: msg = _('Invalid "next_run" value. Must be ISO 8601 format') raise webob.exc.HTTPBadRequest(explanation=msg) next_run = api_utils.schedule_to_next_run(schedule, timeutils.utcnow()) next_run = next_run.replace(tzinfo=None) try: self.db_api.schedule_test_and_set_next_run(schedule['id'], expected_next_run, next_run) except exception.NotFound: msg = _("Specified next run does not match the current next run" " value. This could mean schedule has either changed" "or has already been scheduled since you last expected.") raise webob.exc.HTTPConflict(explanation=msg) # Update schedule last_scheduled values = {} values['last_scheduled'] = timeutils.utcnow() self.db_api.schedule_update(schedule['id'], values) # Create job values = {} values.update(job) values['tenant'] = schedule['tenant'] values['action'] = schedule['action'] values['status'] = 'QUEUED' job_metadata = [] for metadata in schedule['schedule_metadata']: job_metadata.append({ 'key': metadata['key'], 'value': metadata['value'] }) values['job_metadata'] = job_metadata job_action = values['action'] if not 'timeout' in values: values['timeout'] = api_utils.get_new_timeout_by_action(job_action) values['hard_timeout'] = \ api_utils.get_new_timeout_by_action(job_action) job = self.db_api.job_create(values) utils.serialize_datetimes(job) api_utils.serialize_job_metadata(job) job = {'job': job} utils.generate_notification(None, 'qonos.job.create', job, 'INFO') return job
def _get_utcnow(self): return timeutils.utcnow()