def enqueue_task(taskqueue, key): """Enqueues a task for the specified task queue to process the given key. Args: taskqueue: Name of the task queue. key: ndb.Key to pass as a parameter to the task queue. """ utils.enqueue_task( '/internal/queues/%s' % taskqueue, taskqueue, params={ 'key': key.urlsafe(), }, )
def post(self): # Do not run for more than 9 minutes. Exceeding 10min hard limit causes 500. time_to_stop = time.time() + 9 * 60 data = json.loads(self.request.body) start = utils.parse_datetime(data['start']) end = utils.parse_datetime(data['end']) logging.info('Deleting between %s and %s', start, end) triggered = 0 total = 0 q = model.ContentEntry.query(model.ContentEntry.expiration_ts >= start, model.ContentEntry.expiration_ts < end) cursor = None more = True while more and time.time() < time_to_stop: # Since this query dooes not fetch the ContentEntry entities themselves, # we cannot easily compute the size of the data deleted. keys, cursor, more = q.fetch_page(500, start_cursor=cursor, keys_only=True) if not keys: break total += len(keys) data = utils.encode_to_json([k.string_id() for k in keys]) if utils.enqueue_task('/internal/taskqueue/cleanup/expired', 'cleanup-expired', payload=data): triggered += 1 else: logging.warning('Failed to trigger task') logging.info('Triggered %d tasks for %d entries', triggered, total)
def schedule_catalog(): """Enqueues tasks to catalog instances.""" # Only enqueue tasks for uncataloged instances not pending deletion which # are part of active instance group managers which are part of active # instance templates. for instance_template in models.InstanceTemplate.query(): if instance_template.active: instance_template_revision = instance_template.active.get() if instance_template_revision: for instance_group_manager_key in instance_template_revision.active: instance_group_manager = instance_group_manager_key.get() if instance_group_manager: for instance_key in instance_group_manager.instances: instance = instance_key.get() if instance: if not instance.cataloged and not instance.pending_deletion: if not utils.enqueue_task( '/internal/queues/catalog-instance', 'catalog-instance', params={ 'key': instance.key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for Instance: %s', instance.key)
def _maybe_pubsub_notify_via_tq(result_summary, request): """Examines result_summary and enqueues a task to send PubSub message. Must be called within a transaction. Raises CommitError on errors (to abort the transaction). """ assert ndb.in_transaction() assert isinstance(result_summary, task_result.TaskResultSummary), result_summary assert isinstance(request, task_request.TaskRequest), request if result_summary.state in task_result.State.STATES_NOT_RUNNING and request.pubsub_topic: task_id = task_pack.pack_result_summary_key(result_summary.key) ok = utils.enqueue_task( url="/internal/taskqueue/pubsub/%s" % task_id, queue_name="pubsub", transactional=True, payload=utils.encode_to_json( { "task_id": task_id, "topic": request.pubsub_topic, "auth_token": request.pubsub_auth_token, "userdata": request.pubsub_userdata, } ), ) if not ok: raise datastore_utils.CommitError("Failed to enqueue task queue task")
def schedule_creation(): """Enqueues tasks to create missing instance group managers.""" # For each active InstanceGroupManager without a URL, schedule creation # of its instance group manager. Since we are outside a transaction the # InstanceGroupManager could be out of date and may already have a task # scheduled/completed. In either case it doesn't matter since we make # creating an instance group manager and updating the URL idempotent. for instance_template in models.InstanceTemplate.query(): if instance_template.active: instance_template_revision = instance_template.active.get() if instance_template_revision and instance_template_revision.url: for instance_group_manager_key in instance_template_revision.active: instance_group_manager = instance_group_manager_key.get() if instance_group_manager and not instance_group_manager.url: if not utils.enqueue_task( '/internal/queues/create-instance-group-manager', 'create-instance-group-manager', params={ 'key': instance_group_manager_key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for InstanceGroupManager: %s', instance_group_manager_key, )
def trigger_replication(auth_db_rev=None, transactional=False): """Enqueues a task to push auth db to replicas. Args: auth_db_rev: revision to push, if at the moment the task is executing current revision is different, the task will be skipped. By default uses a revision at the moment 'trigger_replication' is called. transactional: if True enqueue the task transactionally. Raises: ReplicationTriggerError on error. """ if auth_db_rev is None: auth_db_rev = model.get_replication_state().auth_db_rev # Use explicit task queue call instead of 'deferred' module to route tasks # through WSGI app set up in backend/handlers.py. It has global state # correctly configured (ereporter config, etc). 'deferred' module uses its # own WSGI app. Task '/internal/taskqueue/replication/<rev>' translates # to a call to 'update_replicas_task(<rev>)'. if not utils.enqueue_task( url='/internal/taskqueue/replication/%d' % auth_db_rev, queue_name='replication', transactional=transactional): raise ReplicationTriggerError()
def _assert_task_props(properties, expiration_ts): """Asserts a TaskDimensions for a specific TaskProperties. Implementation of assert_task(). """ # TODO(maruel): Make it a tasklet. dimensions_hash = hash_dimensions(properties.dimensions) task_dims_key = _get_task_dims_key(dimensions_hash, properties.dimensions) obj = task_dims_key.get() if obj: # Reduce the check to be 5~10 minutes earlier to help reduce an attack of # task queues when there's a strong on-going load of tasks happening. This # jitter is essentially removed from _ADVANCE window. jitter = datetime.timedelta(seconds=random.randint(5 * 60, 10 * 60)) valid_until_ts = expiration_ts - jitter s = obj.match_request(properties.dimensions) if s: if s.valid_until_ts >= valid_until_ts: # Cache hit. It is important to reconfirm the dimensions because a hash # can be conflicting. logging.debug('assert_task(%d): hit', dimensions_hash) return else: logging.info( 'assert_task(%d): set.valid_until_ts(%s) < expected(%s); ' 'triggering rebuild-task-cache', dimensions_hash, s.valid_until_ts, valid_until_ts) else: logging.info( 'assert_task(%d): failed to match the dimensions; triggering ' 'rebuild-task-cache', dimensions_hash) else: logging.info( 'assert_task(%d): new request kind; triggering rebuild-task-cache', dimensions_hash) data = { u'dimensions': properties.dimensions, u'dimensions_hash': str(dimensions_hash), u'valid_until_ts': expiration_ts + _ADVANCE, } payload = utils.encode_to_json(data) # If this task specifies an 'id' value, updates the cache inline since we know # there's only one bot that can run it, so it won't take long. This permits # tasks like 'terminate' tasks to execute faster. if properties.dimensions.get(u'id'): rebuild_task_cache(payload) return # We can't use the request ID since the request was not stored yet, so embed # all the necessary information. url = '/internal/taskqueue/rebuild-task-cache' if not utils.enqueue_task( url, queue_name='rebuild-task-cache', payload=payload): logging.error('Failed to enqueue TaskDimensions update %x', dimensions_hash) # Technically we'd want to raise a endpoints.InternalServerErrorException. # Raising anything that is not TypeError or ValueError is fine. raise Error('Failed to trigger task queue; please try again')
def schedule_poll(): """Enqueues tasks to poll for Pub/Sub messages.""" if not utils.enqueue_task( '/internal/queues/process-pubsub-messages', 'process-pubsub-messages', ): logging.warning('Failed to enqueue task for Pub/Sub')
def _maybe_pubsub_notify_via_tq(result_summary, request): """Examines result_summary and enqueues a task to send PubSub message. Must be called within a transaction. Raises CommitError on errors (to abort the transaction). """ assert ndb.in_transaction() assert isinstance(result_summary, task_result.TaskResultSummary), result_summary assert isinstance(request, task_request.TaskRequest), request if (result_summary.state in task_result.State.STATES_NOT_RUNNING and request.pubsub_topic): task_id = task_pack.pack_result_summary_key(result_summary.key) ok = utils.enqueue_task(url='/internal/taskqueue/pubsub/%s' % task_id, queue_name='pubsub', transactional=True, payload=utils.encode_to_json({ 'task_id': task_id, 'topic': request.pubsub_topic, 'auth_token': request.pubsub_auth_token, 'userdata': request.pubsub_userdata, })) if not ok: raise datastore_utils.CommitError( 'Failed to enqueue task queue task')
def schedule_deleted_instance_cleanup(): """Enqueues tasks to clean up deleted instances.""" # Only delete entities for instances which were marked as deleted >10 minutes # ago. This is because there can be a race condition with the task queue that # detects new instances. At the start of the queue it may detect an instance # which gets deleted before it finishes, and at the end of the queue it may # incorrectly create an entity for that deleted instance. Since task queues # can take at most 10 minutes, we can avoid the race condition by deleting # only those entities referring to instances which were detected as having # been deleted >10 minutes ago. Here we use 20 minutes for safety. THRESHOLD = 60 * 20 now = utils.utcnow() for instance in models.Instance.query(): if instance.deleted and (now - instance.last_updated).seconds > THRESHOLD: if not utils.enqueue_task( '/internal/queues/cleanup-deleted-instance', 'cleanup-deleted-instance', params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def schedule_metadata_tasks(): """Enqueues tasks relating to metadata updates.""" # Some metadata tasks will abort if higher precedence tasks are in # progress. Avoid scheduling these tasks. The priority here is to # get the result of an in-progress metadata operation if one exists. for instance in models.Instance.query(): queue = None if instance.active_metadata_update: if instance.active_metadata_update.url: # Enqueue task to check the in-progress metadata operation. queue = 'check-instance-metadata-operation' else: # Enqueue task to start a metadata operation. queue = 'update-instance-metadata' elif instance.pending_metadata_updates: # Enqueue task to compress a list of desired metadata updates. queue = 'compress-instance-metadata-updates' if queue and not utils.enqueue_task( '/internal/queues/%s' % queue, queue, params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def reclaim_machine(machine_key, reclamation_ts): """Attempts to reclaim the given machine. Args: machine_key: ndb.Key for a model.CatalogMachineEntry instance. reclamation_ts: datetime.datetime instance indicating when the machine was reclaimed. Returns: True if the machine was reclaimed, else False. """ machine = machine_key.get() if not machine: logging.warning('CatalogMachineEntry not found: %s', machine_key) return logging.info('Attempting to reclaim CatalogMachineEntry:\n%s', machine) if machine.lease_expiration_ts is None: # This can reasonably happen if e.g. the lease was voluntarily given up. logging.warning('CatalogMachineEntry no longer leased:\n%s', machine) return False if reclamation_ts < machine.lease_expiration_ts: # This can reasonably happen if e.g. the lease duration was extended. logging.warning('CatalogMachineEntry no longer overdue:\n%s', machine) return False logging.info('Reclaiming CatalogMachineEntry:\n%s', machine) lease = models.LeaseRequest.get_by_id(machine.lease_id) hostname = lease.response.hostname lease.response.hostname = None params = { 'hostname': hostname, 'machine_key': machine.key.urlsafe(), 'machine_subscription': machine.pubsub_subscription, 'machine_subscription_project': machine.pubsub_subscription_project, 'machine_topic': machine.pubsub_topic, 'machine_topic_project': machine.pubsub_topic_project, 'policies': protojson.encode_message(machine.policies), 'request_json': protojson.encode_message(lease.request), 'response_json': protojson.encode_message(lease.response), } backend_attributes = {} for attribute in machine.policies.backend_attributes: backend_attributes[attribute.key] = attribute.value params['backend_attributes'] = utils.encode_to_json(backend_attributes) if lease.request.pubsub_topic: params['lessee_project'] = lease.request.pubsub_project params['lessee_topic'] = lease.request.pubsub_topic if not utils.enqueue_task( '/internal/queues/reclaim-machine', 'reclaim-machine', params=params, transactional=True, ): raise TaskEnqueuingError('reclaim-machine') return True
def lease_machine(machine_key, lease): """Attempts to lease the given machine. Args: machine_key: ndb.Key for a model.CatalogMachineEntry instance. lease: model.LeaseRequest instance. Returns: True if the machine was leased, otherwise False. """ machine = machine_key.get() lease = lease.key.get() logging.info('Attempting to lease matching CatalogMachineEntry:\n%s', machine) if not can_fulfill(machine, lease.request): logging.warning('CatalogMachineEntry no longer matches:\n%s', machine) return False if machine.state != models.CatalogMachineEntryStates.AVAILABLE: logging.warning('CatalogMachineEntry no longer available:\n%s', machine) return False if lease.response.state != rpc_messages.LeaseRequestState.UNTRIAGED: logging.warning('LeaseRequest no longer untriaged:\n%s', lease) return False if not machine.pubsub_subscription: logging.warning('CatalogMachineEntry not subscribed to Pub/Sub yet') return False logging.info('Leasing CatalogMachineEntry:\n%s', machine) lease.leased_ts = utils.utcnow() lease_expiration_ts = lease.leased_ts + datetime.timedelta( seconds=lease.request.duration, ) lease.machine_id = machine.key.id() lease.response.hostname = machine.dimensions.hostname # datetime_to_timestamp returns microseconds, which are too fine grain. lease.response.lease_expiration_ts = utils.datetime_to_timestamp( lease_expiration_ts) / 1000 / 1000 lease.response.state = rpc_messages.LeaseRequestState.FULFILLED machine.lease_id = lease.key.id() machine.lease_expiration_ts = lease_expiration_ts machine.state = models.CatalogMachineEntryStates.LEASED ndb.put_multi([lease, machine]) params = { 'policies': protojson.encode_message(machine.policies), 'request_json': protojson.encode_message(lease.request), 'response_json': protojson.encode_message(lease.response), 'machine_project': machine.pubsub_topic_project, 'machine_topic': machine.pubsub_topic, } if not utils.enqueue_task( '/internal/queues/fulfill-lease-request', 'fulfill-lease-request', params=params, transactional=True, ): raise TaskEnqueuingError('fulfill-lease-request') return True
def _set_executors_metrics(payload): params = _ShardParams(payload) query_iter = bot_management.BotInfo.query().iter( produce_cursors=True, start_cursor=params.cursor) executors_count = 0 while query_iter.has_next(): runtime = (utils.utcnow() - params.start_time).total_seconds() if (executors_count >= _EXECUTORS_PER_SHARD or runtime > _REQUEST_TIMEOUT_SEC): params.cursor = query_iter.cursor_after() params.task_count += 1 utils.enqueue_task(url='/internal/taskqueue/tsmon/executors', queue_name='tsmon', payload=params.json()) params.task_count -= 1 # For accurate logging below. break params.count += 1 executors_count += 1 bot_info = query_iter.next() status = 'ready' if bot_info.task_id: status = 'running' elif bot_info.quarantined: status = 'quarantined' elif bot_info.is_dead: status = 'dead' elif bot_info.state and bot_info.state.get('maintenance', False): status = 'maintenance' target_fields = dict(_TARGET_FIELDS) target_fields['hostname'] = 'autogen:' + bot_info.id _executors_status.set(status, target_fields=target_fields) _executors_pool.set( _pool_from_dimensions(bot_info.dimensions), target_fields=target_fields) logging.debug( '%s: task %d started at %s, processed %d bots (%d total)', '_set_executors_metrics', params.task_count, params.task_start, executors_count, params.count)
def register_instance(self, package_name, instance_id, caller, now=None): """Makes new PackageInstance entity if it is not yet there. Caller must verify that package data is already uploaded to CAS (by using is_instance_file_uploaded method). Args: package_name: name of the package, e.g. 'infra/tools/cipd'. instance_id: identifier of the package instance (SHA1 of package file). caller: auth.Identity that issued the request. now: datetime when the request was made (or None for current time). Returns: Tuple (PackageInstance entity, True if registered or False if existed). """ # Is PackageInstance already registered? key = package_instance_key(package_name, instance_id) inst = key.get() if inst is not None: return inst, False # Register Package entity if missing. now = now or utils.utcnow() pkg_key = package_key(package_name) if not pkg_key.get(): Package(key=pkg_key, registered_by=caller, registered_ts=now).put() inst = PackageInstance(key=key, registered_by=caller, registered_ts=now) # Trigger post processing, if any. processors = [ p.name for p in self.processors if p.should_process(inst) ] if processors: # ID in the URL is FYI only, to see what's running now via admin UI. success = utils.enqueue_task( url='/internal/taskqueue/cipd-process/%s' % instance_id, queue_name='cipd-process', payload=json.dumps( { 'package_name': package_name, 'instance_id': instance_id, 'processors': processors, }, sort_keys=True), transactional=True) if not success: # pragma: no cover raise datastore_errors.TransactionFailedError() # Store the instance, remember what processors have been triggered. inst.processors_pending = processors inst.put() return inst, True
def lease_machine(machine_key, lease): """Attempts to lease the given machine. Args: machine_key: ndb.Key for a model.CatalogMachineEntry instance. lease: model.LeaseRequest instance. Returns: True if the machine was leased, otherwise False. """ machine = machine_key.get() lease = lease.key.get() logging.info('Attempting to lease matching CatalogMachineEntry:\n%s', machine) if not can_fulfill(machine, lease.request): logging.warning('CatalogMachineEntry no longer matches:\n%s', machine) return False if machine.state != models.CatalogMachineEntryStates.AVAILABLE: logging.warning('CatalogMachineEntry no longer available:\n%s', machine) return False if lease.response.state != rpc_messages.LeaseRequestState.UNTRIAGED: logging.warning('LeaseRequest no longer untriaged:\n%s', lease) return False logging.info('Leasing CatalogMachineEntry:\n%s', machine) lease.leased_ts = utils.utcnow() lease_expiration_ts = lease.leased_ts + datetime.timedelta( seconds=lease.request.duration, ) lease.machine_id = machine.key.id() lease.response.hostname = machine.dimensions.hostname # datetime_to_timestamp returns microseconds, which are too fine grain. lease.response.lease_expiration_ts = utils.datetime_to_timestamp( lease_expiration_ts) / 1000 / 1000 lease.response.state = rpc_messages.LeaseRequestState.FULFILLED machine.lease_id = lease.key.id() machine.lease_expiration_ts = lease_expiration_ts machine.state = models.CatalogMachineEntryStates.LEASED ndb.put_multi([lease, machine]) params = { 'policies': protojson.encode_message(machine.policies), 'request_json': protojson.encode_message(lease.request), 'response_json': protojson.encode_message(lease.response), 'machine_project': machine.pubsub_topic_project, 'machine_topic': machine.pubsub_topic, } if not utils.enqueue_task( '/internal/queues/fulfill-lease-request', 'fulfill-lease-request', params=params, transactional=True, ): raise TaskEnqueuingError('fulfill-lease-request') return True
def schedule_pending_deletion(): """Enqueues tasks to delete instances.""" for instance in models.Instance.query(): if instance.pending_deletion: if not utils.enqueue_task( '/internal/queues/delete-instance-pending-deletion', 'delete-instance-pending-deletion', params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def _enqueue_flush(flush_task, task_queue_name): """Enqueues a task that sends metrics.""" # Task queue size limit is 100Kb, beware the size approaching the limit. payload = json.dumps(flush_task, sort_keys=True, separators=(',', ':')) if len(payload) >= _MAX_TASK_SIZE: logging.error('Metrics push task payload is too big.\n%.1f Kb', len(payload) / 1024.0) ok = utils.enqueue_task(url='/internal/task/metrics/flush', queue_name=task_queue_name, payload=payload) if not ok: logging.error('Failed to enqueue a task to send metrics')
def schedule_deleted_instance_cleanup(): """Enqueues tasks to clean up deleted instances.""" for instance in models.Instance.query(): if instance.pending_deletion: if not utils.enqueue_task( '/internal/queues/cleanup-deleted-instance', 'cleanup-deleted-instance', params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def schedule_lease_management(): """Schedules task queues to process each MachineLease.""" for machine_lease in MachineLease.query(): if not utils.enqueue_task( '/internal/taskqueue/machine-provider-manage', 'machine-provider-manage', params={ 'key': machine_lease.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for MachineLease: %s', machine_lease.key)
def post(self): job_id = self.request.get('job_id') assert job_id in mapreduce_jobs.MAPREDUCE_JOBS success = utils.enqueue_task( url='/internal/taskqueue/mapreduce/launch/%s' % job_id, queue_name=mapreduce_jobs.MAPREDUCE_TASK_QUEUE, use_dedicated_module=False) # New tasks should show up on the status page. if success: self.redirect('/restricted/mapreduce/status') else: self.abort(500, 'Failed to launch the job')
def schedule_metadata_operations_check(): """Enqueues tasks to check on metadata operations.""" for instance in models.Instance.query(): if instance.active_metadata_update and instance.active_metadata_update.url: if not utils.enqueue_task( '/internal/queues/check-instance-metadata-operation', 'check-instance-metadata-operation', params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def schedule_pending_deletion(): """Enqueues tasks to delete instances.""" for instance in models.Instance.query(): if instance.pending_deletion and not instance.deleted: if not utils.enqueue_task( '/internal/queues/delete-instance-pending-deletion', 'delete-instance-pending-deletion', params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def schedule_metadata_compressions(): """Enqueues tasks to compress instance metadata.""" for instance in models.Instance.query(): if instance.pending_metadata_updates: if not utils.enqueue_task( '/internal/queues/compress-instance-metadata-updates', 'compress-instance-metadata-updates', params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def _set_global_metrics(): utils.enqueue_task(url='/internal/taskqueue/tsmon/jobs', queue_name='tsmon') utils.enqueue_task(url='/internal/taskqueue/tsmon/executors', queue_name='tsmon') utils.enqueue_task(url='/internal/taskqueue/tsmon/machine_types', queue_name='tsmon')
def register_instance(self, package_name, instance_id, caller, now=None): """Makes new PackageInstance entity if it is not yet there. Caller must verify that package data is already uploaded to CAS (by using is_instance_file_uploaded method). Args: package_name: name of the package, e.g. 'infra/tools/cipd'. instance_id: identifier of the package instance (SHA1 of package file). caller: auth.Identity that issued the request. now: datetime when the request was made (or None for current time). Returns: Tuple (PackageInstance entity, True if registered or False if existed). """ # Is PackageInstance already registered? key = package_instance_key(package_name, instance_id) inst = key.get() if inst is not None: return inst, False # Register Package entity if missing. now = now or utils.utcnow() pkg_key = package_key(package_name) if not pkg_key.get(): Package(key=pkg_key, registered_by=caller, registered_ts=now).put() inst = PackageInstance( key=key, registered_by=caller, registered_ts=now) # Trigger post processing, if any. processors = [p.name for p in self.processors if p.should_process(inst)] if processors: # ID in the URL is FYI only, to see what's running now via admin UI. success = utils.enqueue_task( url='/internal/taskqueue/cipd-process/%s' % instance_id, queue_name='cipd-process', payload=json.dumps({ 'package_name': package_name, 'instance_id': instance_id, 'processors': processors, }, sort_keys=True), transactional=True) if not success: # pragma: no cover raise datastore_errors.TransactionFailedError() # Store the instance, remember what processors have been triggered. inst.processors_pending = processors inst.put() return inst, True
def run(): refreshed = upload_session.key.get() if refreshed.status != UploadSession.STATUS_UPLOADING: # pragma: no cover return refreshed success = utils.enqueue_task( url='/internal/taskqueue/cas-verify/%d' % refreshed.key.id(), queue_name='cas-verify', transactional=True) if not success: # pragma: no cover raise datastore_errors.TransactionFailedError() refreshed.status = UploadSession.STATUS_VERIFYING refreshed.put() return refreshed
def _enqueue_flush(flush_task, task_queue_name): """Enqueues a task that sends metrics.""" # Task queue size limit is 100Kb, beware the size approaching the limit. payload = json.dumps(flush_task, sort_keys=True, separators=(',', ':')) if len(payload) >= _MAX_TASK_SIZE: logging.error( 'Metrics push task payload is too big.\n%.1f Kb', len(payload) / 1024.0) ok = utils.enqueue_task( url='/internal/task/metrics/flush', queue_name=task_queue_name, payload=payload) if not ok: logging.error('Failed to enqueue a task to send metrics')
def get(self, name): if name in ('obliterate', 'old', 'orphaned', 'trim_lost'): url = '/internal/taskqueue/cleanup/' + name # The push task queue name must be unique over a ~7 days period so use # the date at second precision, there's no point in triggering each of # time more than once a second anyway. now = utils.utcnow().strftime('%Y-%m-%d_%I-%M-%S') if utils.enqueue_task(url, 'cleanup', name=name + '_' + now): self.response.out.write('Triggered %s' % url) else: self.abort(500, 'Failed to enqueue a cleanup task, see logs') else: self.abort(404, 'Unknown job')
def schedule_cataloged_instance_update(): """Enqueues tasks to update information about cataloged instances.""" for instance in models.Instance.query(): if instance.cataloged: if not utils.enqueue_task( '/internal/queues/update-cataloged-instance', 'update-cataloged-instance', params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def schedule_metadata_updates(): """Enqueues tasks to update instance metadata.""" for instance in models.Instance.query(): if instance.active_metadata_update: if not instance.active_metadata_update.url: if not utils.enqueue_task( '/internal/queues/update-instance-metadata', 'update-instance-metadata', params={ 'key': instance.key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for Instance: %s', instance.key)
def schedule_metadata_updates(): """Enqueues tasks to update instance metadata.""" for instance in models.Instance.query(): if instance.active_metadata_update: if not instance.active_metadata_update.url: if not utils.enqueue_task( '/internal/queues/update-instance-metadata', 'update-instance-metadata', params={ 'key': instance.key.urlsafe(), }, ): logging.warning('Failed to enqueue task for Instance: %s', instance.key)
def schedule_deletion(): """Enqueues tasks to delete drained instance templates.""" for key in get_drained_instance_template_revisions(): entity = key.get() if entity and entity.url and not entity.active and not entity.drained: if not utils.enqueue_task( '/internal/queues/delete-instance-template', 'delete-instance-template', params={ 'key': key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for InstanceTemplateRevision: %s', key)
def cron_update_named_caches(): """Trigger one task queue per pool to update NamedCache entites.""" total = 0 for pool in pools_config.known(): if not utils.enqueue_task( '/internal/taskqueue/important/named_cache/update-pool', 'named-cache-task', payload=json.dumps({'pool': pool}), ): logging.error('Failed to enqueue task for pool %s', pool) else: logging.debug('Enqueued task for pool %s', pool) total += 1 return total
def create_subscription(machine_key): """Creates a Cloud Pub/Sub subscription for machine communication. Args: machine_key: ndb.Key for the machine whose subscription should be created. """ machine = machine_key.get() logging.info('Attempting to subscribe CatalogMachineEntry:\n%s', machine) if not machine: logging.warning('CatalogMachineEntry no longer exists: %s', machine_key) return if machine.state != models.CatalogMachineEntryStates.NEW: logging.warning('CatalogMachineEntry no longer new:\n%s', machine) return if machine.pubsub_subscription: logging.info('CatalogMachineEntry already subscribed:\n%s', machine) return machine.pubsub_subscription = 'subscription-%s' % machine.key.id() machine.pubsub_topic = 'topic-%s' % machine.key.id() params = { 'backend_project': machine.policies.backend_project, 'backend_topic': machine.policies.backend_topic, 'hostname': machine.dimensions.hostname, 'machine_id': machine.key.id(), 'machine_service_account': machine.policies.machine_service_account, 'machine_subscription': machine.pubsub_subscription, 'machine_subscription_project': machine.pubsub_subscription_project, 'machine_topic': machine.pubsub_topic, 'machine_topic_project': machine.pubsub_topic_project, } backend_attributes = {} for attribute in machine.policies.backend_attributes: backend_attributes[attribute.key] = attribute.value params['backend_attributes'] = utils.encode_to_json(backend_attributes) if utils.enqueue_task( '/internal/queues/subscribe-machine', 'subscribe-machine', params=params, transactional=True, ): machine.state = models.CatalogMachineEntryStates.SUBSCRIBING machine.put() else: raise TaskEnqueuingError('subscribe-machine')
def schedule_deletion(): """Enqueues tasks to delete drained instance group managers.""" for key in get_drained_instance_group_managers(): entity = key.get() if entity and entity.url and not entity.instances: if not utils.enqueue_task( '/internal/queues/delete-instance-group-manager', 'delete-instance-group-manager', params={ 'key': key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for InstanceGroupManager: %s', key)
def lease_machine(machine_key, lease): """Attempts to lease the given machine. Args: machine_key: ndb.Key for a model.CatalogMachineEntry instance. lease: model.LeaseRequest instance. Returns: True if the machine was leased, otherwise False. """ machine = machine_key.get() lease = lease.key.get() logging.info('Attempting to lease matching CatalogMachineEntry:\n%s', machine) if not can_fulfill(machine, lease.request): logging.warning('CatalogMachineEntry no longer matches:\n%s', machine) return False if machine.state != models.CatalogMachineEntryStates.AVAILABLE: logging.warning('CatalogMachineEntry no longer available:\n%s', machine) return False if lease.state != models.LeaseRequestStates.UNTRIAGED: logging.warning('LeaseRequest no longer untriaged:\n%s', lease) return False logging.info('Leasing CatalogMachineEntry:\n%s', machine) lease.leased_ts = utils.utcnow() lease.machine_id = machine.key.id() lease.state = models.LeaseRequestStates.FULFILLED machine.lease_id = lease.key.id() machine.lease_expiration_ts = lease.leased_ts + datetime.timedelta( seconds=lease.request.duration, ) machine.state = models.CatalogMachineEntryStates.LEASED ndb.put_multi([lease, machine]) params = { 'lease_id': lease.key.id(), 'machine_id': machine.key.id(), } if lease.request.pubsub_topic: params['pubsub_project'] = lease.request.pubsub_project params['pubsub_topic'] = lease.request.pubsub_topic if not utils.enqueue_task( '/internal/queues/fulfill-lease-request', 'fulfill-lease-request', params=params, transactional=True, ): raise TaskEnqueuingError('fulfill-lease-request') return True
def schedule_fetch(): """Enqueues tasks to fetch instances.""" for instance_group_manager in models.InstanceGroupManager.query(): if instance_group_manager.url: if not utils.enqueue_task( '/internal/queues/fetch-instances', 'fetch-instances', params={ 'key': instance_group_manager.key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for InstanceGroupManager: %s', instance_group_manager.key, )
def post(self): job_id = self.request.get('job_id') assert job_id in mapreduce_jobs.MAPREDUCE_JOBS # Do not use 'backend' module when running from dev appserver. Mapreduce # generates URLs that are incompatible with dev appserver URL routing when # using custom modules. success = utils.enqueue_task( url='/internal/taskqueue/mapreduce/launch/%s' % job_id, queue_name=mapreduce_jobs.MAPREDUCE_TASK_QUEUE, use_dedicated_module=not utils.is_local_dev_server()) # New tasks should show up on the status page. if success: self.redirect('/restricted/mapreduce/status') else: self.abort(500, 'Failed to launch the job')
def tag_existing(cls, collection): """Tag existing digests with new timestamp. Arguments: collection: a DigestCollection containing existing digests Returns: the enqueued task if there were existing entries; None otherwise """ if collection.items: url = '/internal/taskqueue/tag/%s/%s' % ( collection.namespace.namespace, utils.datetime_to_timestamp(utils.utcnow())) payload = ''.join( binascii.unhexlify(digest.digest) for digest in collection.items) return utils.enqueue_task(url, 'tag', payload=payload)
def schedule_drained_deletion(): """Enqueues tasks to delete drained instances.""" for instance_group_manager_key in ( instance_group_managers.get_drained_instance_group_managers()): instance_group_manager = instance_group_manager_key.get() if instance_group_manager: for instance_key in instance_group_manager.instances: instance = instance_key.get() if instance and not instance.cataloged: if not utils.enqueue_task( '/internal/queues/delete-drained-instance', 'delete-drained-instance', params={ 'key': instance.key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for Instance: %s', instance.key)
def schedule_removal(): """Enqueues tasks to remove drained instances from the catalog.""" for instance_group_manager_key in ( instance_group_managers.get_drained_instance_group_managers()): instance_group_manager = instance_group_manager_key.get() if instance_group_manager: for instance_key in instance_group_manager.instances: instance = instance_key.get() if instance and instance.cataloged: if not utils.enqueue_task( '/internal/queues/remove-cataloged-instance', 'remove-cataloged-instance', params={ 'key': instance.key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for Instance: %s', instance.key)
def schedule_resize(): """Enqueues tasks to resize instance group managers.""" for instance_template in models.InstanceTemplate.query(): if instance_template.active: instance_template_revision = instance_template.active.get() if instance_template_revision: for instance_group_manager_key in instance_template_revision.active: if not utils.enqueue_task( '/internal/queues/resize-instance-group', 'resize-instance-group', params={ 'key': instance_group_manager_key.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for InstanceGroupManager: %s', instance_group_manager_key, )
def schedule_creation(): """Enqueues tasks to create missing instance templates.""" # For each active InstanceTemplateRevision without a URL, schedule # creation of its instance template. Since we are outside a transaction # the InstanceTemplateRevision could be out of date and may already have # a task scheduled/completed. In either case it doesn't matter since # we make creating an instance template and updating the URL idempotent. for instance_template in models.InstanceTemplate.query(): if instance_template.active: instance_template_revision = instance_template.active.get() if instance_template_revision and not instance_template_revision.url: if not utils.enqueue_task( '/internal/queues/create-instance-template', 'create-instance-template', params={ 'key': instance_template.active.urlsafe(), }, ): logging.warning( 'Failed to enqueue task for InstanceTemplateRevision: %s', instance_template.active, )
def reclaim_machine(machine_key, reclamation_ts): """Attempts to reclaim the given machine. Args: machine_key: ndb.Key for a model.CatalogMachineEntry instance. reclamation_ts: datetime.datetime instance indicating when the machine was reclaimed. Returns: True if the machine was reclaimed, else False. """ machine = machine_key.get() logging.info('Attempting to reclaim CatalogMachineEntry:\n%s', machine) if machine.lease_expiration_ts is None: # This can reasonably happen if e.g. the lease was voluntarily given up. logging.warning('CatalogMachineEntry no longer leased:\n%s', machine) return False if reclamation_ts < machine.lease_expiration_ts: # This can reasonably happen if e.g. the lease duration was extended. logging.warning('CatalogMachineEntry no longer overdue:\n%s', machine) return False logging.info('Reclaiming CatalogMachineEntry:\n%s', machine) lease = models.LeaseRequest.get_by_id(machine.lease_id) hostname = lease.response.hostname lease.machine_id = None lease.response.hostname = None machine.lease_id = None machine.lease_expiration_ts = None policy = machine.policies.on_reclamation if policy == rpc_messages.MachineReclamationPolicy.DELETE: logging.info('Executing MachineReclamationPolicy: DELETE') lease.put() machine.key.delete() else: if policy == rpc_messages.MachineReclamationPolicy.MAKE_AVAILABLE: logging.info('Executing MachineReclamationPolicy: MAKE_AVAILABLE') machine.state = models.CatalogMachineEntryStates.AVAILABLE else: if policy != rpc_messages.MachineReclamationPolicy.RECLAIM: # Something is awry. Log an error, but still reclaim the machine. # Fall back on the RECLAIM policy because it notifies the backend and # prevents the machine from being leased out again, but keeps it in # the Catalog in case we want to examine it further. logging.error( 'Unexpected MachineReclamationPolicy: %s\nDefaulting to RECLAIM', policy, ) else: logging.info('Executing MachineReclamationPolicy: RECLAIM') machine.state = models.CatalogMachineEntryStates.RECLAIMED ndb.put_multi([lease, machine]) params = { 'hostname': hostname, 'policies': protojson.encode_message(machine.policies), 'request_json': protojson.encode_message(lease.request), 'response_json': protojson.encode_message(lease.response), } backend_attributes = {} for attribute in machine.policies.backend_attributes: backend_attributes[attribute.key] = attribute.value params['backend_attributes'] = utils.encode_to_json(backend_attributes) if lease.request.pubsub_topic: params['lessee_project'] = lease.request.pubsub_project params['lessee_topic'] = lease.request.pubsub_topic if not utils.enqueue_task( '/internal/queues/reclaim-machine', 'reclaim-machine', params=params, transactional=True, ): raise TaskEnqueuingError('reclaim-machine') return True
def create_instance_group(name, dimensions, policies, instances): """Stores an InstanceGroup and Instance entities in the datastore. Also attempts to catalog each running Instance in the Machine Provider. Operates on two root entities: model.Instance and model.InstanceGroup. Args: name: Name of this instance group. dimensions: machine_provider.Dimensions describing members of this instance group. policies: machine_provider.Policies governing members of this instance group. instances: Return value of gce.get_managed_instances listing instances in this instance group. """ instance_map = {} instances_to_catalog = [] for instance_name, instance in instances.iteritems(): logging.info('Processing instance: %s', instance_name) instance_key = models.Instance.generate_key(instance_name) instance_map[instance_name] = models.Instance( key=instance_key, group=name, name=instance_name, state=models.InstanceStates.UNCATALOGED, ) if instance['instanceStatus'] == 'RUNNING': existing_instance = instance_key.get() if existing_instance: if existing_instance.state == models.InstanceStates.UNCATALOGED: logging.info('Attempting to catalog instance: %s', instance_name) instances_to_catalog.append(instance_name) else: logging.info('Skipping already cataloged instance: %s', instance_name) instance_map[instance_name].state = existing_instance.state else: logging.warning( 'Instance not running: %s\ncurrentAction: %s\ninstanceStatus: %s', instance_name, instance['currentAction'], instance['instanceStatus'], ) if instances_to_catalog: # Above we defaulted each instance to UNCATALOGED. Here, try to enqueue a # task to catalog them in the Machine Provider, setting CATALOGED if # successful. if utils.enqueue_task( '/internal/queues/catalog-instance-group', 'catalog-instance-group', params={ 'dimensions': utils.encode_to_json(dimensions), 'instances': utils.encode_to_json(instances_to_catalog), 'policies': utils.encode_to_json(policies), }, transactional=True, ): for instance_name in instances_to_catalog: instance_map[instance_name].state = models.InstanceStates.CATALOGED else: logging.info('Nothing to catalog') ndb.put_multi(instance_map.values()) models.InstanceGroup.create_and_put( name, dimensions, policies, sorted(instance_map.keys()))
def tag_entries(entries, namespace): """Enqueues a task to update the timestamp for given entries.""" url = '/internal/taskqueue/tag/%s/%s' % ( namespace, utils.datetime_to_timestamp(utils.utcnow())) payload = ''.join(binascii.unhexlify(e.digest) for e in entries) return utils.enqueue_task(url, 'tag', payload=payload)