def purge_files(min_size, purgable_files): # we try to keep the cache as full as possible and defer deleting # any files until we can fullfil an allocation request completely # - since no request will start opportunisticly anyway. Also only # delete enough to ensure min_size bytes. bytes_freed = 0 bytes_freeable = sum(sf.size for sf in purgable_files) requests_to_delete = set() if min_size <= bytes_freeable: for sf in purgable_files: if bytes_freed >= min_size: break logger.debug('deleting %s' % sf) # raises OSError on any error except if not existing: if sf.staging_task: util.unlink(sf.staging_task.path_out) is_deleted = util.unlink(sf.path_staged()) if not is_deleted: logger.warn('tried purging staged file %s - but it is already ' 'gone' % sf) sf.state = 'offline' bytes_freed += sf.size requests_to_delete.update(set(sf.requests)) return bytes_freed, requests_to_delete
def fail_request(r, msg): r.state = 'failed' for t in staging_tasks_owned_by(r): logger.debug('revoking task %s belonging only to failed request %s' % \ (t, r)) t.cancel() notify_user(r, msg) mark_request_deletable.apply_async(args=[r.uuid], countdown=config.REQUEST_PINNING_TIME)
def create_mars_request(verb, file_name, target=None): sf = tasks.session.query(StagableFile).get(file_name) logger.debug('creating MARS request from %s' % sf) params = {} for l in sf.params.split(',\n'): k,v = l.split('=', 1) params[k.strip()] = v.strip() if target: params['target'] = target req = MarsRequest(verb, params) logger.debug('MARS request is: %s' % req) return req
def submit_sizing_tasks(r_uuid): files_unknown_size = tasks.session.query(StagableFile).\ join(StagableFile.requests).\ filter(DownloadRequest.uuid == r_uuid).\ filter(StagableFile.size == None).\ filter(StagableFile.sizing_task == None) logger.info('=> submitting sizing tasks for request %s: %s' % \ (r_uuid, ', '.join(sf.name for sf in files_unknown_size))) for sf in files_unknown_size: async_result = tasks.worker.estimate_size.delay(sf.name) sf.sizing_task = Task(async_result.id) logger.debug('submitted: %s' % sf.sizing_task) tasks.session.commit()
def join_staging_task(self, task_id): try: staging_task = get_task(task_id) logger.debug('staging task for %s is: %s' % (task_id, staging_task)) # BUG: FIXME: why is this a list and not a file, we do use uselist=False sf = staging_task.stagable_file[0] logger.info('staging task %s completed, file is %s' % (task_id, sf.name)) handle_staging_result(staging_task, sf) tasks.session.commit() except Exception, e: logger.error('join_staging_task(task_id=%s) unexpectedly failed (task ' 'id %s): %s, retrying in 60 s...' % (task_id, join_staging_task.request.id, str(e))) raise self.retry(exc=e, countdown=60)
def mark_request_deletable(self, r_uuid): try: r = tasks.session.query(DownloadRequest).get(r_uuid) logger.info('marking %s request %s as deletable' % (r.state, r_uuid)) r.is_deletable = True tasks.session.commit() # since there might be pending requests that wait for space to # be freed up: logger.debug('=> invoking scheduler') schedule_tasks.delay() except Exception, e: logger.error('mark_request_deletable(uuid=%s) unexpectedly failed ' '(task id %s): %s, retrying in 60 s...' % (r_uuid, mark_request_deletable.request.id, str(e))) raise self.retry(exc=e, countdown=60)
def estimate_size(self, file_name): try: logger.info('calling MARS to estimate size for %s' % file_name) size = None re_size = re.compile(r'^size=([\d]+);$') mars_request = create_mars_request(verb='LIST', file_name=file_name) mars_request.params['OUTPUT'] = 'COST' logger.debug('mars_request: %s' % mars_request) log_fn = logger.debug for rc,fd,l in util.exec_proc([ 'mars' ], logger, stdin=str(mars_request)): if fd == 2: log_fn = logger.warn if l is not None: log_fn(l.strip()) if size is None and fd == 1 and l is not None: m = re_size.match(l) if m: size = int(m.group(1).replace(",", "")) assert rc is not None and fd is None and l is None # don't trust size if mars returns non-zero if rc != 0 or size is None: logger.error('failed to compute size, rc = %d' % rc) logger.error('MARS request was: %s' % mars_request) raise TaskFailure('failed to compute size, rc = %d' % rc) elif size == 0: logger.error('size is %d' % size) logger.error('MARS request was: %s' % mars_request) raise HardTaskFailure('size is %d' % size) est_size = size * config.FILE_SIZE_WEIGHT + config.FILE_SIZE_EXTRA logger.debug('MARS reported size: %d bytes, after compensating: size * ' '%d + %d = %d' % \ (size, config.FILE_SIZE_WEIGHT, config.FILE_SIZE_EXTRA, est_size)) logger.info('size is %d' % est_size) return est_size except HardTaskFailure: raise except Exception, e: logger.error('estimate_size(file_name=%s) unexpectedly failed (task id ' '%s): %s, retrying in 60 s...' % (file_name, estimate_size.request.id, str(e))) raise self.retry(exc=e, countdown=60)
def register_request(self, uuid, openid, file_to_query): try: r = DownloadRequest(uuid, openid) logger.debug('created new request %s' % r) file_names = file_to_query.keys() registered_files = tasks.session.query(StagableFile).\ filter(StagableFile.name.in_(file_names)) logger.debug('registered files: %s' % registered_files.all()) new_file_names = set(file_names) - set(x.name for x in registered_files) logger.debug('new file names: %s' % new_file_names) new_files = [ StagableFile(file_name, file_to_query[file_name]) for file_name in new_file_names ] logger.debug('new files: %s' % new_files) registered_files.update( { StagableFile.request_count : StagableFile.request_count + 1 }, synchronize_session=False) r.files.extend(registered_files) r.files.extend(new_files) # This won't throw an integrityerror as long as this task is # single threaded. If not, there's always a risk that one or more # identical StagableFile may be inserted concurrently. tasks.session.add(r) tasks.session.commit() logger.info('registered request %s openid=%s, file_names=%s ' '(unregistered=%s)' % \ (r.uuid, openid, file_names, new_file_names)) submit_sizing_tasks(r.uuid) logger.debug('=> invoking scheduler') schedule_tasks.delay() except Exception, e: # Note that some errors will never complete OK, for example if # we try to register two or more requests with the same uuid # (unlikely) - these cases are not handled and will need # manual admin intervention (in this case removal of the # redundant requests) logger.error('register_request(uuid=%s, openid=%s, file_to_query=%s) ' 'unexpectedly failed (task id %s): %s, retrying in 60 s...' % (uuid, openid, file_to_query, register_request.request.id, str(e))) raise self.retry(exc=e, countdown=60)
def join_sizing_tasks(files): with allow_join_result(): for sf in files: try: assert sf.size is None, sf logger.debug('<= awaiting size estimation result for %s, ' 'task is %s' % (sf, sf.sizing_task)) if sf.sizing_task is None: # this means it failed before and the task got # unregistered from sf raise TaskFailure('size estimation has failed - a new task ' 'will be created on the next request ' 'containing this file') size = sf.sizing_task.get() logger.debug('size estimated to %d bytes' % size) assert size >= 0, size sf.size = size except Exception, e: logger.warning('size estimation task %s failed: %s' % \ (sf.sizing_task, e)) raise e finally:
def handle_staging_result(staging_task, sf): with allow_join_result(): logger.debug('getting staging result') try: staging_task.get() # returns None, but more importantly propagates any exception in the task except Exception, e: logger.warning('%s failed: %s' % (staging_task, e)) # consider resubmitting the task a limited amount since it # should only fail in rare cases (if so then do this by # raising a RetryException or similar here) logger.debug('deregistering %s from %s' % (sf.staging_task, sf)) tasks.session.delete(staging_task) sf.staging_task = None assert sf.staging_task is None, sf logger.debug('db commit') tasks.session.commit() for r in dispatched_requests(sf): logger.info('request %s failed' % r.uuid) fail_request(r, 'Staging of %s failed: %s' % (sf, str(e))) return # we deliberately only handle exceptions from the staging task, any other exception will propagate
def stage_file(self, file_name, target_dir, path_out): try: end_target = os.path.join(target_dir, file_name) tmp_target = os.path.join(target_dir, uuid.uuid4().get_hex()) logger.info('staging %s' % end_target) logger.debug('tmp_target: %s' % tmp_target) mars_request = create_mars_request(verb='RETRIEVE', file_name=file_name, target=tmp_target) logger.debug('mars_request: %s' % mars_request) with open(path_out, 'w') as f: for rc,fd,l in util.exec_proc([ 'mars' ], logger, stdin=str(mars_request)): if fd is not None and l is not None: if fd == 1: logger.debug('fd=%s, l=%s' % (fd, l.strip() if l else l)) else: logger.warning('fd=%s, l=%s' % (fd, l.strip() if l else l)) f.write(l) f.flush() f.flush() os.fdatasync(f.fileno()) f.close() if rc != 0: logger.error('failed to stage %s, rc = %d' % (file_name, rc)) logger.error('MARS request was: %s' % mars_request) logger.debug('removing temp file %s' % tmp_target) util.unlink(tmp_target) # FIXME: use try...finally raise TaskFailure('mars returned %d' % rc) logger.debug('moving temp file %s -> %s' % (tmp_target, end_target)) os.rename(tmp_target, end_target) logger.info('%s is staged online' % end_target) logger.debug('=> invoking scheduler') tasks.scheduler.join_staging_task.delay(stage_file.request.id) except Exception, e: logger.error('stage_file(file_name=%s, target_dir=%s, path_out=%s) ' 'unexpectedly failed (task id %s): %s, retrying in ' '60 s...' % (file_name, target_dir, path_out, stage_file.request.id, str(e))) raise self.retry(exc=e, countdown=60)
def schedule_tasks(self): try: dispatchable_requests = get_dispatchable_requests() logger.info('running scheduler - %d dispatchable request(s): %s' % \ ( dispatchable_requests.count(), ', '.join(r.uuid for r in dispatchable_requests))) reserved_files = get_reserved_files() online_files = get_online_files() reserved_space = sum(sf.size for sf in reserved_files) used_space = sum(sf.size for sf in online_files) available_space = config.STAGE_SPACE - used_space purgable_files = get_purgable_files() purgable_amount = sum(sf.size for sf in purgable_files) assert len(reserved_files.all()) == len(set(reserved_files)), \ reserved_files.all() assert used_space >= 0, used_space assert reserved_space >= 0, (reserved_files, reserved_space) logger.debug('reserved files: %s' % ', '.join(sf.name for sf in reserved_files)) logger.debug('online files: %s' % ', '.join(sf.name for sf in online_files)) logger.debug('purgable amount: %d bytes from %s' % \ (purgable_amount, ', '.join(sf.name for sf in purgable_files))) logger.info('total staging space: %d bytes, used: %d bytes, reserved: %d ' 'bytes. Max %d bytes available for new requests (%d bytes ' 'purgable)' % \ (config.STAGE_SPACE, used_space, reserved_space, config.STAGE_SPACE - reserved_space, purgable_amount)) dispatch_tasks = True num_tasks_dispatched = 0 num_tasks_failed = 0 num_tasks_deferred = 0 consumed_space = 0 for rs in dispatchable_requests: try: files_offline_not_being_staged = get_files_offline_not_being_staged(rs) logger.info('scheduling %s, available space %d bytes, offline files: %s' % \ (rs.uuid, available_space, ', '.join(sf.name for sf in files_offline_not_being_staged))) logger.debug('offline files: %s' % \ files_offline_not_being_staged.all()) logger.info('waiting for all size estimation tasks...') join_sizing_tasks(files_offline_not_being_staged.\ filter(StagableFile.size == None)) except Exception, e: # Note: all requests will keep on failing until this task has been resubmitted logger.warning('sizing estimation of %s failed: %s, request -> ' 'failed' % (rs.uuid, e)) fail_request(rs, 'sizing estimation of %s failed: %s' % (rs.uuid, e)) num_tasks_failed += 1 continue assert all(f.size is not None for f in files_offline_not_being_staged),\ files_offline_not_being_staged offline_size = sum(f.size for f in files_offline_not_being_staged) total_size = sum(f.size for f in rs.files) logger.info('total request size: %d bytes, %d offline bytes' % \ (total_size, offline_size)) # if staging fails any later request will we must make sure # either to fail any later requests for that file # fastforward all dispatchable zero cost requests and any # requests that is impossible to fulfill: # created -> failed, # created -> dispatched, # created -> finished: if total_size > config.STAGE_SPACE: logger.info('fast forwarding %s -> failed - since there is no way ' 'it can be fulfilled (needs %d of %d bytes ' 'available)' % \ (rs.uuid, total_size, config.STAGE_SPACE)) fail_request(rs, '%s can not be fulfilled (needs %d of %d bytes ' 'available)' % (rs.uuid, total_size, config.STAGE_SPACE)) num_tasks_failed += 1 elif not files_offline_not_being_staged.first(): if get_files_offline_being_staged(rs).first(): logger.info('fast forwarding %s -> dispatched - since it ' 'fully overlaps with staging tasks in ' 'progress' % rs.uuid) rs.state = 'dispatched' num_tasks_dispatched += 1 else: # this should not fail unless db is being concurrently # updated: assert all(x.state == 'online' for x in rs.files), rs logger.info('fast forwarding %s -> finished - since all ' 'files are online' % rs.uuid) finish_request(rs) num_tasks_dispatched += 1 elif dispatch_tasks: if offline_size > available_space: logger.info('%s requires more space than what is available ' '(%d > %d), initiating cache purging' % \ ( rs.uuid, offline_size, available_space )) required_amount = offline_size - available_space bytes_freed, requests_to_delete = purge_files(required_amount, purgable_files) available_space += bytes_freed logger.info('freed up %d/%d bytes (%d%%) - available space %d ' 'bytes' % \ ( bytes_freed, required_amount, int(round(bytes_freed / float(offline_size) * 100, 3)), available_space )) if requests_to_delete: assert all(x.is_deletable for x in requests_to_delete), requests_to_delete logger.info('deleting all deletable requests from db ' 'belonging to files that have been ' 'purged: %s' % \ ', '.join(x.uuid for x in requests_to_delete)) tasks.session.query(DownloadRequest).\ filter(DownloadRequest.uuid.in_(x.uuid for x in requests_to_delete)).\ delete(synchronize_session='fetch') if offline_size > available_space: logger.info('%d bytes needed but only %d bytes available -' ' not considering any further requests for task' ' dispatching until the next request has ' 'finished/failed' % \ (offline_size, available_space)) dispatch_tasks = False # schedule_tasks will be triggered upon next # request failure or deletion after finish num_tasks_deferred += 1 # optimisation possibility: we can also dispatch as many # tasks as there currently is room for even if the request # can not be fully staged yet (but that would also # complicate how we compute reserved/available_space) if dispatch_tasks: logger.info('staging offline files for request %s: %s' % \ (rs.uuid, ', '.join(sf.name for sf in files_offline_not_being_staged))) for sf in files_offline_not_being_staged: path_out = util.create_tempfile() # we'd like to chain the join_staging_tasks here # rather than calling it from the stage_file task, # but then we cannot register the async result # like this... async_result = tasks.worker.stage_file.delay(sf.name, sf.path, path_out) # note the critical window if worker dies here the # task may have started but will not be registered # with f (the same goes for submitting sizing # tasks). We choose risking more than one # identical staging task running (which is # harmless and has a low probability, though # wastes some resources) rather than risking # having the task registered with the file but not # actually running (which is harder to handle) # # (possibly create a celery task group consisting # of staging -> checksumming): sf.staging_task = Task(async_result.id, path_out) # IMPORTANT: commit immediately since stage_file # calls join_staging_tasks which looks up the task # in the db: tasks.session.commit() logger.info('=> staging task for %s is %s' % \ (sf.name, sf.staging_task.uuid)) consumed_space += offline_size available_space -= offline_size rs.state = 'dispatched' num_tasks_dispatched += 1 else: logger.info('request %s is not eliglible for dispatching since it ' 'can not be fast forwarded and at least one higher ' 'priority request is still waiting to be dispatched' % \ rs.uuid) num_tasks_deferred += 1 logger.info('scheduling iteration completed, %d tasks dispatched, %d ' 'tasks failed, %d tasks deferred. Consumed space %d bytes, ' 'total space available %d bytes' % \ (num_tasks_dispatched, num_tasks_failed, num_tasks_deferred, consumed_space, available_space)) tasks.session.commit()
if sf.sizing_task is None: # this means it failed before and the task got # unregistered from sf raise TaskFailure('size estimation has failed - a new task ' 'will be created on the next request ' 'containing this file') size = sf.sizing_task.get() logger.debug('size estimated to %d bytes' % size) assert size >= 0, size sf.size = size except Exception, e: logger.warning('size estimation task %s failed: %s' % \ (sf.sizing_task, e)) raise e finally: logger.debug('unregistering size estimation task %s' % \ (sf.sizing_task)) tasks.session.delete(sf.sizing_task) sf.sizing_task = None @tasks.cel.task(bind=True, acks_late=True, ignore_results=True) def mark_request_deletable(self, r_uuid): try: r = tasks.session.query(DownloadRequest).get(r_uuid) logger.info('marking %s request %s as deletable' % (r.state, r_uuid)) r.is_deletable = True tasks.session.commit() # since there might be pending requests that wait for space to # be freed up: logger.debug('=> invoking scheduler') schedule_tasks.delay()