def _save_coverage_information(context, result): """Saves coverage information in datastore using an atomic transaction.""" # Use ndb.transaction with retries below to mitigate risk of a race condition. def _try_save_coverage_information(): """Implements save_coverage_information function.""" coverage_info = data_handler.get_coverage_information( context.fuzz_target.project_qualified_name(), result.coverage_info.date, create_if_needed=True, ) # Intentionally skip edge and function coverage values as those would come # from fuzzer coverage cron task (see src/go/server/cron/coverage.go). coverage_info.corpus_size_units = result.coverage_info.corpus_size_units coverage_info.corpus_size_bytes = result.coverage_info.corpus_size_bytes coverage_info.corpus_location = result.coverage_info.corpus_location coverage_info.corpus_backup_location = ( result.coverage_info.corpus_backup_location) coverage_info.quarantine_size_units = result.coverage_info.quarantine_size_units coverage_info.quarantine_size_bytes = result.coverage_info.quarantine_size_bytes coverage_info.quarantine_location = result.coverage_info.quarantine_location coverage_info.put() try: ndb.transaction(_try_save_coverage_information, retries=data_handler.DEFAULT_FAIL_RETRIES) except Exception as e: raise CorpusPruningException( "Failed to save corpus pruning result: %s." % str(e))
def acquire_lock(key_name, max_hold_seconds=DEFAULT_MAX_HOLD_SECONDS, retries=None, by_zone=True): """Acquire a lock for the given key name. Returns the expiration time if succeeded, otherwise None. The lock holder is responsible for making sure it doesn't assume the lock is still held after the expiration time.""" logs.log('Acquiring lock for %s.' % key_name) failed_acquires = 0 total_wait = 0 wait_exponent = 1 if by_zone: key_name_with_zone = _get_key_name_with_lock_zone(key_name) if key_name_with_zone is None: logs.log_error('Failed to get zone while trying to lock %s.' % key_name) return None key_name = key_name_with_zone bot_name = environment.get_value('BOT_NAME') expiration_delta = datetime.timedelta(seconds=max_hold_seconds) while total_wait < LOCK_CHECK_TIMEOUT: try: lock_entity = ndb.transaction( lambda: _try_acquire_lock(key_name, expiration_time=datetime.datetime.utcnow() + expiration_delta, holder=bot_name), retries=TRANSACTION_RETRIES) if lock_entity.holder == bot_name: _update_lock_statistics( key_name, acquires=1, failed_acquires=failed_acquires, wait_time=int(total_wait)) logs.log('Got the lock.') return lock_entity.expiration_time except datastore_errors.TransactionFailedError: pass failed_acquires += 1 if retries and retries >= failed_acquires: logs.log('Failed to acquire lock, exceeded max retries.') return None logs.log('Failed to acquire lock, waiting...') # Exponential backoff. max_sleep = (1 << wait_exponent) * LOCK_CHECK_SLEEP_MULTIPLIER sleep_time = random.uniform(1.0, max_sleep) time.sleep(sleep_time) total_wait += sleep_time wait_exponent = min(wait_exponent + 1, MAX_WAIT_EXPONENT) logs.log('Timeout exceeded while trying to acquire lock, bailing.') _update_lock_statistics(key_name, bails=1, failed_acquires=failed_acquires) return None
def update_task_status(task_name, status, expiry_interval=None): """Updates status for a task. Used to ensure that a single instance of a task is running at any given time.""" bot_name = environment.get_value('BOT_NAME') failure_wait_interval = environment.get_value('FAIL_WAIT') # If we didn't get an expiry interval, default to our task lease interval. if expiry_interval is None: expiry_interval = environment.get_value('TASK_LEASE_SECONDS') if expiry_interval is None: logs.log_error('expiry_interval is None and TASK_LEASE_SECONDS not set.') def _try_update_status(): """Try update metadata.""" task_status = get_task_status(task_name, create_if_needed=True) # If another bot is already working on this task, bail out with error. if (status == data_types.TaskState.STARTED and task_status.status == data_types.TaskState.STARTED and not dates.time_has_expired( task_status.time, seconds=expiry_interval - 1)): return False task_status.bot_name = bot_name task_status.status = status task_status.time = utils.utcnow() task_status.put() return True # It is important that we do not continue until the metadata is updated. # This can lead to task loss, or can cause issues with multiple bots # attempting to run the task at the same time. while True: try: return ndb.transaction(_try_update_status, retries=0) except Exception: # We need to update the status under all circumstances. # Failing to update 'completed' status causes another bot # that picked up this job to bail out. logs.log_error('Unable to update %s task metadata. Retrying.' % task_name) time.sleep(utils.random_number(1, failure_wait_interval))